20
20
automation_handler = MacOSAutomationHandler ()
21
21
22
22
class Diorama :
23
+ """Virtual desktop manager that provides automation capabilities for macOS applications.
24
+
25
+ Manages application windows and provides an interface for taking screenshots,
26
+ mouse interactions, keyboard input, and coordinate transformations between
27
+ screenshot space and screen space.
28
+ """
23
29
_scheduler_queue = None
24
30
_scheduler_task = None
25
31
_loop = None
26
32
_scheduler_started = False
27
33
28
34
@classmethod
29
35
def create_from_apps (cls , * args ) -> DioramaComputer :
36
+ """Create a DioramaComputer instance from a list of application names.
37
+
38
+ Args:
39
+ *args: Variable number of application names to include in the desktop
40
+
41
+ Returns:
42
+ DioramaComputer: A computer interface for the specified applications
43
+ """
30
44
cls ._ensure_scheduler ()
31
45
return cls (args ).computer
32
46
33
47
# Dictionary to store cursor positions for each unique app_list hash
34
48
_cursor_positions = {}
35
49
36
50
def __init__ (self , app_list ):
51
+ """Initialize a Diorama instance for the specified applications.
52
+
53
+ Args:
54
+ app_list: List of application names to manage
55
+ """
37
56
self .app_list = app_list
38
57
self .interface = self .Interface (self )
39
58
self .computer = DioramaComputer (self )
@@ -48,6 +67,10 @@ def __init__(self, app_list):
48
67
49
68
@classmethod
50
69
def _ensure_scheduler (cls ):
70
+ """Ensure the async scheduler loop is running.
71
+
72
+ Creates and starts the scheduler task if it hasn't been started yet.
73
+ """
51
74
if not cls ._scheduler_started :
52
75
logger .info ("Starting Diorama scheduler loop…" )
53
76
cls ._scheduler_queue = asyncio .Queue ()
@@ -57,6 +80,11 @@ def _ensure_scheduler(cls):
57
80
58
81
@classmethod
59
82
async def _scheduler_loop (cls ):
83
+ """Main scheduler loop that processes automation commands.
84
+
85
+ Continuously processes commands from the scheduler queue, handling
86
+ screenshots, mouse actions, keyboard input, and scrolling operations.
87
+ """
60
88
while True :
61
89
cmd = await cls ._scheduler_queue .get ()
62
90
action = cmd .get ("action" )
@@ -144,13 +172,33 @@ async def _scheduler_loop(cls):
144
172
future .set_exception (e )
145
173
146
174
class Interface ():
175
+ """Interface for interacting with the virtual desktop.
176
+
177
+ Provides methods for taking screenshots, mouse interactions, keyboard input,
178
+ and coordinate transformations between screenshot and screen coordinates.
179
+ """
180
+
147
181
def __init__ (self , diorama ):
182
+ """Initialize the interface with a reference to the parent Diorama instance.
183
+
184
+ Args:
185
+ diorama: The parent Diorama instance
186
+ """
148
187
self ._diorama = diorama
149
188
150
189
self ._scene_hitboxes = []
151
190
self ._scene_size = None
152
191
153
192
async def _send_cmd (self , action , arguments = None ):
193
+ """Send a command to the scheduler queue.
194
+
195
+ Args:
196
+ action (str): The action to perform
197
+ arguments (dict, optional): Arguments for the action
198
+
199
+ Returns:
200
+ The result of the command execution
201
+ """
154
202
Diorama ._ensure_scheduler ()
155
203
loop = asyncio .get_event_loop ()
156
204
future = loop .create_future ()
@@ -167,6 +215,14 @@ async def _send_cmd(self, action, arguments=None):
167
215
return None
168
216
169
217
async def screenshot (self , as_bytes : bool = True ) -> Union [str , Image .Image ]:
218
+ """Take a screenshot of the managed applications.
219
+
220
+ Args:
221
+ as_bytes (bool): If True, return base64-encoded bytes; if False, return PIL Image
222
+
223
+ Returns:
224
+ Union[str, Image.Image]: Base64-encoded PNG bytes or PIL Image object
225
+ """
170
226
import base64
171
227
result , img = await self ._send_cmd ("screenshot" )
172
228
self ._scene_hitboxes = result .get ("hitboxes" , [])
@@ -184,6 +240,12 @@ async def screenshot(self, as_bytes: bool = True) -> Union[str, Image.Image]:
184
240
return img
185
241
186
242
async def left_click (self , x , y ):
243
+ """Perform a left mouse click at the specified coordinates.
244
+
245
+ Args:
246
+ x (int): X coordinate in screenshot space (or None to use last position)
247
+ y (int): Y coordinate in screenshot space (or None to use last position)
248
+ """
187
249
# Get last cursor position for this app_list hash
188
250
app_list_hash = hash (tuple (sorted (self ._diorama .app_list )))
189
251
last_pos = Diorama ._cursor_positions .get (app_list_hash , (0 , 0 ))
@@ -195,6 +257,12 @@ async def left_click(self, x, y):
195
257
await self ._send_cmd ("left_click" , {"x" : sx , "y" : sy })
196
258
197
259
async def right_click (self , x , y ):
260
+ """Perform a right mouse click at the specified coordinates.
261
+
262
+ Args:
263
+ x (int): X coordinate in screenshot space (or None to use last position)
264
+ y (int): Y coordinate in screenshot space (or None to use last position)
265
+ """
198
266
# Get last cursor position for this app_list hash
199
267
app_list_hash = hash (tuple (sorted (self ._diorama .app_list )))
200
268
last_pos = Diorama ._cursor_positions .get (app_list_hash , (0 , 0 ))
@@ -206,6 +274,12 @@ async def right_click(self, x, y):
206
274
await self ._send_cmd ("right_click" , {"x" : sx , "y" : sy })
207
275
208
276
async def double_click (self , x , y ):
277
+ """Perform a double mouse click at the specified coordinates.
278
+
279
+ Args:
280
+ x (int): X coordinate in screenshot space (or None to use last position)
281
+ y (int): Y coordinate in screenshot space (or None to use last position)
282
+ """
209
283
# Get last cursor position for this app_list hash
210
284
app_list_hash = hash (tuple (sorted (self ._diorama .app_list )))
211
285
last_pos = Diorama ._cursor_positions .get (app_list_hash , (0 , 0 ))
@@ -217,6 +291,12 @@ async def double_click(self, x, y):
217
291
await self ._send_cmd ("double_click" , {"x" : sx , "y" : sy })
218
292
219
293
async def move_cursor (self , x , y ):
294
+ """Move the mouse cursor to the specified coordinates.
295
+
296
+ Args:
297
+ x (int): X coordinate in screenshot space (or None to use last position)
298
+ y (int): Y coordinate in screenshot space (or None to use last position)
299
+ """
220
300
# Get last cursor position for this app_list hash
221
301
app_list_hash = hash (tuple (sorted (self ._diorama .app_list )))
222
302
last_pos = Diorama ._cursor_positions .get (app_list_hash , (0 , 0 ))
@@ -228,6 +308,13 @@ async def move_cursor(self, x, y):
228
308
await self ._send_cmd ("move_cursor" , {"x" : sx , "y" : sy })
229
309
230
310
async def drag_to (self , x , y , duration = 0.5 ):
311
+ """Drag the mouse from current position to the specified coordinates.
312
+
313
+ Args:
314
+ x (int): X coordinate in screenshot space (or None to use last position)
315
+ y (int): Y coordinate in screenshot space (or None to use last position)
316
+ duration (float): Duration of the drag operation in seconds
317
+ """
231
318
# Get last cursor position for this app_list hash
232
319
app_list_hash = hash (tuple (sorted (self ._diorama .app_list )))
233
320
last_pos = Diorama ._cursor_positions .get (app_list_hash , (0 , 0 ))
@@ -239,18 +326,43 @@ async def drag_to(self, x, y, duration=0.5):
239
326
await self ._send_cmd ("drag_to" , {"x" : sx , "y" : sy , "duration" : duration })
240
327
241
328
async def get_cursor_position (self ):
329
+ """Get the current cursor position in screen coordinates.
330
+
331
+ Returns:
332
+ tuple: (x, y) coordinates of the cursor in screen space
333
+ """
242
334
return await self ._send_cmd ("get_cursor_position" )
243
335
244
336
async def type_text (self , text ):
337
+ """Type the specified text using the keyboard.
338
+
339
+ Args:
340
+ text (str): The text to type
341
+ """
245
342
await self ._send_cmd ("type_text" , {"text" : text })
246
343
247
344
async def press_key (self , key ):
345
+ """Press a single key on the keyboard.
346
+
347
+ Args:
348
+ key (str): The key to press
349
+ """
248
350
await self ._send_cmd ("press_key" , {"key" : key })
249
351
250
352
async def hotkey (self , keys ):
353
+ """Press a combination of keys simultaneously.
354
+
355
+ Args:
356
+ keys (list): List of keys to press together
357
+ """
251
358
await self ._send_cmd ("hotkey" , {"keys" : list (keys )})
252
359
253
360
async def scroll_up (self , clicks : int = 1 ):
361
+ """Scroll up at the current cursor position.
362
+
363
+ Args:
364
+ clicks (int): Number of scroll clicks to perform
365
+ """
254
366
# Get last cursor position for this app_list hash
255
367
app_list_hash = hash (tuple (sorted (self ._diorama .app_list )))
256
368
last_pos = Diorama ._cursor_positions .get (app_list_hash , (0 , 0 ))
@@ -259,6 +371,11 @@ async def scroll_up(self, clicks: int = 1):
259
371
await self ._send_cmd ("scroll_up" , {"clicks" : clicks , "x" : x , "y" : y })
260
372
261
373
async def scroll_down (self , clicks : int = 1 ):
374
+ """Scroll down at the current cursor position.
375
+
376
+ Args:
377
+ clicks (int): Number of scroll clicks to perform
378
+ """
262
379
# Get last cursor position for this app_list hash
263
380
app_list_hash = hash (tuple (sorted (self ._diorama .app_list )))
264
381
last_pos = Diorama ._cursor_positions .get (app_list_hash , (0 , 0 ))
@@ -267,6 +384,11 @@ async def scroll_down(self, clicks: int = 1):
267
384
await self ._send_cmd ("scroll_down" , {"clicks" : clicks , "x" : x , "y" : y })
268
385
269
386
async def get_screen_size (self ) -> dict [str , int ]:
387
+ """Get the size of the screenshot area.
388
+
389
+ Returns:
390
+ dict[str, int]: Dictionary with 'width' and 'height' keys
391
+ """
270
392
if not self ._scene_size :
271
393
await self .screenshot ()
272
394
return { "width" : self ._scene_size [0 ], "height" : self ._scene_size [1 ] }
@@ -348,6 +470,7 @@ async def to_screenshot_coordinates(self, x: float, y: float) -> tuple[float, fl
348
470
import time
349
471
350
472
async def main ():
473
+ """Main function demonstrating Diorama usage with multiple desktops and mouse tracking."""
351
474
desktop1 = Diorama .create_from_apps (["Discord" , "Notes" ])
352
475
desktop2 = Diorama .create_from_apps (["Terminal" ])
353
476
0 commit comments