
    Fij&                       U d Z ddlmZ ddlmZmZ dddi ddg d	d
dddg ddddddddddddddddddddddidddd d!dg d"d#dd$ddg d%d&d'd(d)dd*dd+dd,dd-dddiddd.d d/dddiddd0d d1dg d2d3dd4dd5dd6dd7dd8dd9dd:dd;dd<d=dd>d?dd>d@ddAdgdBdCZdDedE<   dIdGZdHS )JuN  Schema for the generic `computer_use` tool.

Model-agnostic. Any tool-calling model can drive this. Vision-capable models
should prefer `capture(mode='som')` then `click(element=N)` — much more
reliable than pixel coordinates. Pixel coordinates remain supported for
models that were trained on them (e.g. Claude's computer-use RL).
    )annotations)AnyDictcomputer_useu  Drive the macOS desktop in the background — screenshots, mouse, keyboard, scroll, drag — without stealing the user's cursor, keyboard focus, or Space. Preferred workflow: call with action='capture' (mode='som' gives numbered element overlays), then click by `element` index for reliability. Pixel coordinates are supported for models trained on them. Works on any window — hidden, minimized, on another Space, or behind another app. macOS only; requires cua-driver to be installed.objectactionstring)captureclickdouble_clickright_clickmiddle_clickdragscrolltypekey	set_valuewait	list_apps	focus_appu
  Which action to perform. `capture` is free (no side effects). All other actions require approval unless auto-approved. Use `set_value` for select/popup elements and sliders — it selects the matching option directly without opening the native menu (no focus steal).)r   enumdescriptionmode)somvisionaxu  Capture mode. `som` (default) is a screenshot with numbered overlays on every interactable element plus the AX tree — best for vision models, lets you click by element index. `vision` is a plain screenshot. `ax` is the accessibility tree only (no image; useful for text-only models).appzOptional. Limit capture/action to a specific app (by name, e.g. 'Safari', or bundle ID, 'com.apple.Safari'). If omitted, operates on the frontmost app's window or the whole screen.)r   r   max_elementsintegeru  Optional cap on the AX `elements` array returned by `action='capture'`. Default 100, hard maximum 1000. Dense UIs (Electron apps such as Obsidian or VS Code, JetBrains IDEs) can publish 500+ AX nodes — capping prevents a single capture from blowing session context. When the cap trims the response, `total_elements` and `truncated_elements` are surfaced in the result so you can re-call with `app=` to narrow scope or raise `max_elements` when the full tree is required. Has no effect on `mode='som'` / `mode='vision'` when a screenshot is included in the response; only the rare image-missing fallback returns an `elements` array and is subject to the cap.d      i  )r   r   defaultminimummaximumelementzoThe 1-based SOM index returned by the last `capture(mode='som')` call. Strongly preferred over raw coordinates.
coordinatearrayr      zPixel coordinates [x, y] in logical screen space (as returned by capture width/height). Only use this if no element index is available.)r   itemsminItemsmaxItemsr   button)leftrightmiddlezMouse button. Defaults to left.	modifiers)cmdshiftoptionaltctrlfn)r   r   z%Modifier keys held during the action.)r   r)   r   from_elementzSource element index (drag).
to_elementzTarget element index (drag).from_coordinatez3Source [x,y] (drag; use when no element available).to_coordinatez3Target [x,y] (drag; use when no element available).	direction)updownr-   r.   zScroll direction.amountzScroll wheel ticks. Default 3.valuezFor action='set_value': the value to set on the element. For AXPopUpButton / select dropdowns, pass the option's display label (e.g. 'Blue'). For sliders and other AXValue-settable elements, pass the numeric or string value.textz+Text to type (respects the current layout).keyszUKey combo, e.g. 'cmd+s', 'ctrl+alt+t', 'return', 'escape', 'tab'. Use '+' to combine.numberzSeconds to wait. Max 30.booleanu   Only for action='focus_app'. If true, brings the window to front (DISRUPTS the user). Default false — input is routed to the app without raising, matching the background co-work model.zIf true, take a follow-up capture after the action and include it in the response. Saves a round-trip when you need to verify an action's effect.)secondsraise_windowcapture_after)r   
propertiesrequired)namer   
parametersDict[str, Any]COMPUTER_USE_SCHEMAreturnc                     t           S )z2Return the generic OpenAI function-calling schema.)rL        >/home/ubuntu/.hermes/hermes-agent/tools/computer_use/schema.pyget_computer_use_schemarR      s    rP   N)rM   rK   )	__doc__
__future__r   typingr   r   rL   __annotations__rR   rO   rP   rQ   <module>rW      sq     # " " " " "         	; o
    H% o
6  ///-	 7o
N  B Oo
` !* ) ao
N !' Oo
^  ),5
 
_o
t  333@ uo
~ $KKK   G o
P Y-KM MQo
T 9+IK KUo
X  ),1T	   Yo
d  ),1T	 eo
r  7772 so
| !? }o
F  S Go
Z  L [o
b  ; co
r !9  "=  "B Oo
 o
 o
` Jes s@' @'  @ @ @ @F     rP   