Skip to content

Commit da9af2e

Browse files
authored
Merge pull request #390 from onel/reference-docs-20250901_145129
Reference documentation batch
2 parents 62efcd7 + 8b2dd7b commit da9af2e

File tree

5 files changed

+829
-27
lines changed

5 files changed

+829
-27
lines changed

libs/python/computer-server/computer_server/diorama/diorama.py

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,20 +20,39 @@
2020
automation_handler = MacOSAutomationHandler()
2121

2222
class Diorama:
23+
"""Virtual desktop manager that provides automation capabilities for macOS applications.
24+
25+
Manages application windows and provides an interface for taking screenshots,
26+
mouse interactions, keyboard input, and coordinate transformations between
27+
screenshot space and screen space.
28+
"""
2329
_scheduler_queue = None
2430
_scheduler_task = None
2531
_loop = None
2632
_scheduler_started = False
2733

2834
@classmethod
2935
def create_from_apps(cls, *args) -> DioramaComputer:
36+
"""Create a DioramaComputer instance from a list of application names.
37+
38+
Args:
39+
*args: Variable number of application names to include in the desktop
40+
41+
Returns:
42+
DioramaComputer: A computer interface for the specified applications
43+
"""
3044
cls._ensure_scheduler()
3145
return cls(args).computer
3246

3347
# Dictionary to store cursor positions for each unique app_list hash
3448
_cursor_positions = {}
3549

3650
def __init__(self, app_list):
51+
"""Initialize a Diorama instance for the specified applications.
52+
53+
Args:
54+
app_list: List of application names to manage
55+
"""
3756
self.app_list = app_list
3857
self.interface = self.Interface(self)
3958
self.computer = DioramaComputer(self)
@@ -48,6 +67,10 @@ def __init__(self, app_list):
4867

4968
@classmethod
5069
def _ensure_scheduler(cls):
70+
"""Ensure the async scheduler loop is running.
71+
72+
Creates and starts the scheduler task if it hasn't been started yet.
73+
"""
5174
if not cls._scheduler_started:
5275
logger.info("Starting Diorama scheduler loop…")
5376
cls._scheduler_queue = asyncio.Queue()
@@ -57,6 +80,11 @@ def _ensure_scheduler(cls):
5780

5881
@classmethod
5982
async def _scheduler_loop(cls):
83+
"""Main scheduler loop that processes automation commands.
84+
85+
Continuously processes commands from the scheduler queue, handling
86+
screenshots, mouse actions, keyboard input, and scrolling operations.
87+
"""
6088
while True:
6189
cmd = await cls._scheduler_queue.get()
6290
action = cmd.get("action")
@@ -144,13 +172,33 @@ async def _scheduler_loop(cls):
144172
future.set_exception(e)
145173

146174
class Interface():
175+
"""Interface for interacting with the virtual desktop.
176+
177+
Provides methods for taking screenshots, mouse interactions, keyboard input,
178+
and coordinate transformations between screenshot and screen coordinates.
179+
"""
180+
147181
def __init__(self, diorama):
182+
"""Initialize the interface with a reference to the parent Diorama instance.
183+
184+
Args:
185+
diorama: The parent Diorama instance
186+
"""
148187
self._diorama = diorama
149188

150189
self._scene_hitboxes = []
151190
self._scene_size = None
152191

153192
async def _send_cmd(self, action, arguments=None):
193+
"""Send a command to the scheduler queue.
194+
195+
Args:
196+
action (str): The action to perform
197+
arguments (dict, optional): Arguments for the action
198+
199+
Returns:
200+
The result of the command execution
201+
"""
154202
Diorama._ensure_scheduler()
155203
loop = asyncio.get_event_loop()
156204
future = loop.create_future()
@@ -167,6 +215,14 @@ async def _send_cmd(self, action, arguments=None):
167215
return None
168216

169217
async def screenshot(self, as_bytes: bool = True) -> Union[str, Image.Image]:
218+
"""Take a screenshot of the managed applications.
219+
220+
Args:
221+
as_bytes (bool): If True, return base64-encoded bytes; if False, return PIL Image
222+
223+
Returns:
224+
Union[str, Image.Image]: Base64-encoded PNG bytes or PIL Image object
225+
"""
170226
import base64
171227
result, img = await self._send_cmd("screenshot")
172228
self._scene_hitboxes = result.get("hitboxes", [])
@@ -184,6 +240,12 @@ async def screenshot(self, as_bytes: bool = True) -> Union[str, Image.Image]:
184240
return img
185241

186242
async def left_click(self, x, y):
243+
"""Perform a left mouse click at the specified coordinates.
244+
245+
Args:
246+
x (int): X coordinate in screenshot space (or None to use last position)
247+
y (int): Y coordinate in screenshot space (or None to use last position)
248+
"""
187249
# Get last cursor position for this app_list hash
188250
app_list_hash = hash(tuple(sorted(self._diorama.app_list)))
189251
last_pos = Diorama._cursor_positions.get(app_list_hash, (0, 0))
@@ -195,6 +257,12 @@ async def left_click(self, x, y):
195257
await self._send_cmd("left_click", {"x": sx, "y": sy})
196258

197259
async def right_click(self, x, y):
260+
"""Perform a right mouse click at the specified coordinates.
261+
262+
Args:
263+
x (int): X coordinate in screenshot space (or None to use last position)
264+
y (int): Y coordinate in screenshot space (or None to use last position)
265+
"""
198266
# Get last cursor position for this app_list hash
199267
app_list_hash = hash(tuple(sorted(self._diorama.app_list)))
200268
last_pos = Diorama._cursor_positions.get(app_list_hash, (0, 0))
@@ -206,6 +274,12 @@ async def right_click(self, x, y):
206274
await self._send_cmd("right_click", {"x": sx, "y": sy})
207275

208276
async def double_click(self, x, y):
277+
"""Perform a double mouse click at the specified coordinates.
278+
279+
Args:
280+
x (int): X coordinate in screenshot space (or None to use last position)
281+
y (int): Y coordinate in screenshot space (or None to use last position)
282+
"""
209283
# Get last cursor position for this app_list hash
210284
app_list_hash = hash(tuple(sorted(self._diorama.app_list)))
211285
last_pos = Diorama._cursor_positions.get(app_list_hash, (0, 0))
@@ -217,6 +291,12 @@ async def double_click(self, x, y):
217291
await self._send_cmd("double_click", {"x": sx, "y": sy})
218292

219293
async def move_cursor(self, x, y):
294+
"""Move the mouse cursor to the specified coordinates.
295+
296+
Args:
297+
x (int): X coordinate in screenshot space (or None to use last position)
298+
y (int): Y coordinate in screenshot space (or None to use last position)
299+
"""
220300
# Get last cursor position for this app_list hash
221301
app_list_hash = hash(tuple(sorted(self._diorama.app_list)))
222302
last_pos = Diorama._cursor_positions.get(app_list_hash, (0, 0))
@@ -228,6 +308,13 @@ async def move_cursor(self, x, y):
228308
await self._send_cmd("move_cursor", {"x": sx, "y": sy})
229309

230310
async def drag_to(self, x, y, duration=0.5):
311+
"""Drag the mouse from current position to the specified coordinates.
312+
313+
Args:
314+
x (int): X coordinate in screenshot space (or None to use last position)
315+
y (int): Y coordinate in screenshot space (or None to use last position)
316+
duration (float): Duration of the drag operation in seconds
317+
"""
231318
# Get last cursor position for this app_list hash
232319
app_list_hash = hash(tuple(sorted(self._diorama.app_list)))
233320
last_pos = Diorama._cursor_positions.get(app_list_hash, (0, 0))
@@ -239,18 +326,43 @@ async def drag_to(self, x, y, duration=0.5):
239326
await self._send_cmd("drag_to", {"x": sx, "y": sy, "duration": duration})
240327

241328
async def get_cursor_position(self):
329+
"""Get the current cursor position in screen coordinates.
330+
331+
Returns:
332+
tuple: (x, y) coordinates of the cursor in screen space
333+
"""
242334
return await self._send_cmd("get_cursor_position")
243335

244336
async def type_text(self, text):
337+
"""Type the specified text using the keyboard.
338+
339+
Args:
340+
text (str): The text to type
341+
"""
245342
await self._send_cmd("type_text", {"text": text})
246343

247344
async def press_key(self, key):
345+
"""Press a single key on the keyboard.
346+
347+
Args:
348+
key (str): The key to press
349+
"""
248350
await self._send_cmd("press_key", {"key": key})
249351

250352
async def hotkey(self, keys):
353+
"""Press a combination of keys simultaneously.
354+
355+
Args:
356+
keys (list): List of keys to press together
357+
"""
251358
await self._send_cmd("hotkey", {"keys": list(keys)})
252359

253360
async def scroll_up(self, clicks: int = 1):
361+
"""Scroll up at the current cursor position.
362+
363+
Args:
364+
clicks (int): Number of scroll clicks to perform
365+
"""
254366
# Get last cursor position for this app_list hash
255367
app_list_hash = hash(tuple(sorted(self._diorama.app_list)))
256368
last_pos = Diorama._cursor_positions.get(app_list_hash, (0, 0))
@@ -259,6 +371,11 @@ async def scroll_up(self, clicks: int = 1):
259371
await self._send_cmd("scroll_up", {"clicks": clicks, "x": x, "y": y})
260372

261373
async def scroll_down(self, clicks: int = 1):
374+
"""Scroll down at the current cursor position.
375+
376+
Args:
377+
clicks (int): Number of scroll clicks to perform
378+
"""
262379
# Get last cursor position for this app_list hash
263380
app_list_hash = hash(tuple(sorted(self._diorama.app_list)))
264381
last_pos = Diorama._cursor_positions.get(app_list_hash, (0, 0))
@@ -267,6 +384,11 @@ async def scroll_down(self, clicks: int = 1):
267384
await self._send_cmd("scroll_down", {"clicks": clicks, "x": x, "y": y})
268385

269386
async def get_screen_size(self) -> dict[str, int]:
387+
"""Get the size of the screenshot area.
388+
389+
Returns:
390+
dict[str, int]: Dictionary with 'width' and 'height' keys
391+
"""
270392
if not self._scene_size:
271393
await self.screenshot()
272394
return { "width": self._scene_size[0], "height": self._scene_size[1] }
@@ -348,6 +470,7 @@ async def to_screenshot_coordinates(self, x: float, y: float) -> tuple[float, fl
348470
import time
349471

350472
async def main():
473+
"""Main function demonstrating Diorama usage with multiple desktops and mouse tracking."""
351474
desktop1 = Diorama.create_from_apps(["Discord", "Notes"])
352475
desktop2 = Diorama.create_from_apps(["Terminal"])
353476

0 commit comments

Comments
 (0)