
    oq'jpR                       d Z ddlmZ ddlZddlZddlZddlZddlZddlm	Z	 ddl
mZmZmZmZmZ  ej        e          Z eh d          ZdZd                    d	 eD                       Z ej        d
ez   dz   ej                  Z ej        dez   dz   ej                  Zd3dZ eh d          Z eh d          Zd4dZd5dZ d6dZ!d7d Z"	 d8d9d!Z#d:d"Z$d;d%Z%d8d<d)Z&d=d*Z'	 d8d>d1Z(g d2Z)dS )?u  Routing helpers for inbound user-attached images.

Two modes:

  native  — attach images as OpenAI-style ``image_url`` content parts on the
            user turn. Provider adapters (Anthropic, Gemini, Bedrock, Codex,
            OpenAI chat.completions) already translate these into their
            vendor-specific multimodal formats.

  text    — run ``vision_analyze`` on each image up-front and prepend the
            description to the user's text. The model never sees the pixels;
            it only sees a lossy text summary. This is the pre-existing
            behaviour and still the right choice for non-vision models.

The decision is made once per message turn by :func:`decide_image_input_mode`.
It reads ``agent.image_input_mode`` from config.yaml (``auto`` | ``native``
| ``text``, default ``auto``) and the active model's capability metadata.

In ``auto`` mode:
  - If the user has explicitly configured ``auxiliary.vision.provider``
    (i.e. not ``auto`` and not empty), we assume they want the text pipeline
    regardless of the main model — they've opted in to a specific vision
    backend for a reason (cost, quality, local-only, etc.).
  - Otherwise, if the active model reports ``supports_vision=True`` in its
    models.dev metadata, we attach natively.
  - Otherwise (non-vision model, no explicit override), we fall back to text.

This keeps ``vision_analyze`` surfaced as a tool in every session — skills
and agent flows that chain it (browser screenshots, deeper inspection of
URL-referenced images, style-gating loops) keep working. The routing only
affects *how user-attached images on the current turn* are presented to the
main model.
    )annotationsN)Path)AnyDictListOptionalTuple>   autotextnative)	.png.jpg.jpeg.gif.webp.bmpz.tiffz.tifz.heic|c              #  @   K   | ]}|                     d           V  dS ).N)lstrip).0es     8/home/ubuntu/.hermes/hermes-agent/agent/image_routing.py	<genexpr>r   ;   s,      AAahhsmmAAAAAA    z/(?<![/:\w.])(?:~/|/)(?:[\w.\-]+/)*[\w.\-]+\.(?:z)\bzhttps?://[^\s<>\"']+?\.(?:z)(?:\?[^\s<>\"']*)?r   strreturnTuple[List[str], List[str]]c                   t          | t                    r| sg g fS g t          j        d| t          j                  D ]=}                    |                                |                                f           >t          j        d|           D ]=}                    |                                |                                f           >d
fd}g }t                      }t                              |           D ]} ||                                          r |
                    d          }t          j                            |          }	 t          j                            |          sun# t          $ r Y w xY w||v r|                    |           |                    |           g }t                      }	t"                              |           D ]y} ||                                          r |
                    d          }
|
                    d	          }
|
|	v rO|	                    |
           |                    |
           z||fS )u  Scan free-form text for image references the model should see.

    Returns ``(local_paths, urls)``:

      * ``local_paths`` — absolute (``/``) or home-relative (``~/``) paths
        whose suffix is an image extension AND whose expanded form exists
        on disk as a file. Order-preserving, deduplicated.
      * ``urls`` — ``http(s)://…`` URLs whose path ends in an image
        extension (a ``?query`` is allowed after the extension).
        Order-preserving, deduplicated.

    Matches inside fenced code blocks (``` ``` ```) and inline backticks
    (`` `…` ``) are skipped so that snippets pasted into a task body for
    reference aren't mistaken for live attachments. This mirrors the
    behaviour of ``gateway.platforms.base.BaseAdapter.extract_local_files``.

    Local paths are validated against the filesystem; URLs are not
    (the provider fetches them at request time).
    z```[^\n]*\n.*?```z	`[^`\n]+`posintr   boolc                <     t           fdD                       S )Nc              3  >   K   | ]\  }}|cxk    o|k     nc V  d S N )r   sr   r    s      r   r   z7extract_image_refs.<locals>._in_code.<locals>.<genexpr>n   s;      77DAq1<<<<a<<<<777777r   )any)r    
code_spanss   `r   _in_codez$extract_image_refs.<locals>._in_codem   s'    7777J777777r   r   z	.,;:!?)]>)r    r!   r   r"   )
isinstancer   refinditerDOTALLappendstartendset_LOCAL_IMAGE_PATH_REgroupospath
expanduserisfileOSErroradd_IMAGE_URL_RErstrip)r   mr*   local_paths
seen_pathsmatchrawexpandedurls	seen_urlsurlr)   s              @r   extract_image_refsrF   N   ss   ( dC    2v )+J[-tRY?? 0 017799aeegg.////[t,, 0 017799aeegg.////8 8 8 8 8 8  K55J%..t44 % %8EKKMM"" 	kk!nn7%%c**	7>>(++  	 	 	H	 z!!x   8$$$$D%%I''-- 
 
8EKKMM"" 	kk!nn jj%%)cCs   E66
FF>   1onyestrue>   0noofffalserA   r   Optional[bool]c                *   t          | t                    r| S t          | t                    r| dv rt          |           S dS t          | t                    r<|                                                                 }|t          v rdS |t          v rdS dS )z@Return True/False for recognised boolean values, None otherwise.)r      NTF)r+   r"   r!   r   striplower_TRUE_TOKENS_FALSE_TOKENS)rA   r'   s     r   _coerce_capability_boolrV      s    #t 
#s &==99t#s IIKK454r   cfgOptional[Dict[str, Any]]providermodelc                   t          | t                    sdS |                     d          }t          |t                    r|ni }t          |                    d                    }||S t	          |                    d          pd                                          }|                     d          }t          |t                    r|ni }t                              t          d||f                    D ]}	|                    |	          }
t          |
t                    r|
ni }|                    d          }t          |t                    r|ni }|                    |          }t          |t                    r|ni }t          |                    d                    }||c S |                     d          }t          |t                    rjt                      }t          d||f          D ]o}	|
                    |	           |	                    d	          r+|
                    |	t          d	          d                    W|
                    d	|	            p|D ]}
t          |
t                    st	          |
                    d
          pd                                          }||vrS|
                    d          }t          |t                    r|ni }|                    |          }t          |t                    r|ni }t          |                    d                    }||c S dS )u`  Resolve user-declared vision capability from config.yaml.

    Resolution order, first hit wins:
      1. ``model.supports_vision`` (top-level shortcut for the active model)
      2. ``providers.<provider>.models.<model>.supports_vision``
         (named custom providers — ``provider`` may be the runtime-resolved
         value ``"custom"`` and/or the user-declared name under
         ``model.provider``; both are tried)

    Returns None when no override is set, so the caller falls through to
    models.dev. Returns False explicitly only when the user wrote a
    recognised boolean false token.
    NrZ   supports_visionrY    	providersmodelscustom_providerszcustom:name)r+   dictgetrV   r   rR   fromkeysfilterlistr2   r:   
startswithlen)rW   rY   rZ   model_cfg_raw	model_cfgtopconfig_providerproviders_rawproviders_cfgp	entry_rawentry
models_raw
models_cfgper_model_raw	per_modelcoercedr`   candidate_names
entry_names                       r   _supports_vision_overridery      s7   $ c4   t GGG$$M1;M41P1P XVXI
!)--0A"B"B
C
CC

 )--
339r::@@BBOGGK((M5?t5T5T$\MMZ\M]]6$?(CDDEE 	 	!%%a((	-7	4-H-H P		bYYx((
3=j$3O3O%WZZUW
"u--5?t5T5T$\MMZ\	))--8I*J*JKKNNN  ww122"D))   #uux9:: 	3 	3A"""||I&& 3##Ac)nnoo$67777##MaMM2222) 	 	Ii.. Y]]6228b99??AAJ00"x00J'1*d'C'CKJ&NN511M)3M4)H)HPbI-imm<M.N.NOOG" # 4r   c                    t          | t                    sdS |                                                                 }|t          v r|S dS )z5Normalize a config value into one of the valid modes.r
   )r+   r   rR   rS   _VALID_MODES)rA   vals     r   _coerce_moder}      sG    c3 v
))++



C
l
6r   r"   c                h   t          | t                    sdS |                     d          pi }t          |t                    sdS |                    d          pi }t          |t                    sdS t          |                    d          pd                                                                          }t          |                    d          pd                                          }t          |                    d          pd                                          }|dv r|s|sdS d	S )
zTrue when the user configured a specific auxiliary vision backend.

    An explicit override means the user *wants* the text pipeline (they're
    paying for a dedicated vision model), so we don't silently bypass it.
    F	auxiliaryvisionrY   r]   rZ   base_url>   r]   r
   T)r+   rb   rc   r   rR   rS   )rW   auxr   rY   rZ   r   s         r   _explicit_aux_vision_overrider     s     c4   u
''+


$"Cc4   uWWX$"Ffd## u6::j))/R006688>>@@H

7##)r**0022E6::j))/R006688H <hu4r   c                    t          || |          }||S | r|sdS 	 ddlm}  || |          }n5# t          $ r(}t                              d| ||           Y d}~dS d}~ww xY w|dS t          |j                  S )a  Return True/False if we can resolve caps, None if unknown.

    Consults the user's ``supports_vision`` override in config.yaml first
    (so custom/local models declared as vision-capable don't fall through to
    text routing in ``auto`` mode), then falls back to models.dev.
    Nr   )get_model_capabilitiesu2   image_routing: caps lookup failed for %s:%s — %s)ry   agent.models_devr   	Exceptionloggerdebugr"   r\   )rY   rZ   rW   overrider   capsexcs          r   _lookup_supports_visionr   !  s     )h>>H 5 t;;;;;;%%h66   I8UZ\_```ttttt |t$%%%s   0 
A"AA"c                @   d}t          |t                    rN|                    d          pi }t          |t                    r"t          |                    d                    }|dk    rdS |dk    rdS t	          |          rdS t          | ||          }|du rdS dS )a1  Return ``"native"`` or ``"text"`` for the given turn.

    Args:
      provider: active inference provider ID (e.g. ``"anthropic"``, ``"openrouter"``).
      model:    active model slug as it would be sent to the provider.
      cfg:      loaded config.yaml dict, or None. When None, behaves as auto.
    r
   agentimage_input_moder   r   T)r+   rb   rc   r}   r   r   )rY   rZ   rW   mode_cfg	agent_cfgsupportss         r   decide_image_input_moder   <  s     H#t GGGG$$*	i&& 	G#IMM2D$E$EFFH8x6v %S)) v&x<<H4x6r   bytesOptional[str]c                t   | sdS |                      d          rdS |                      d          rdS | dd         dv rdS t          |           d	k    r| dd
         dk    r| dd	         dk    rdS |                      d          rdS t          |           d	k    r| d
d         dk    r| dd	         dv rdS dS )a  Detect image MIME from magic bytes. Returns None if unrecognised.

    Filename-based detection (``mimetypes.guess_type``) is unreliable when
    upstream platforms lie about content-type. Discord, for example, can
    serve a PNG with ``content_type=image/webp`` for proxied/animated
    stickers, custom emoji previews, or images uploaded via certain bots.
    Anthropic strictly validates that declared media_type matches the
    actual bytes and returns HTTP 400 on mismatch, so we sniff to be safe.
    Ns   PNG

	image/pngs   
image/jpeg   >      GIF87a   GIF89a	image/gif      s   RIFF   s   WEBP
image/webps   BM	image/bmps   ftyp>      heic   heim   heis   heix   hevc   hevx   mif1   msf1z
image/heic)rg   rh   rA   s    r   _sniff_mime_from_bytesr   l  s      t
~~*++ {
~~o&& |
2A2w((({
3xx2~~#bqb'W,,QrTg1E1E|
~~e {
3xx2~~#ac(g--#ad) @ 3 3 |4r   r6   r   Optional[bytes]c                   |t          |          }|r|S t          j        t          |                     \  }}|r|                    d          r|S | j                                        }ddddddd                    |d          S )	zReturn image MIME type for *path*.

    If *raw* bytes are provided, magic-byte sniffing wins (authoritative).
    Otherwise we fall back to ``mimetypes`` then suffix-based defaults.
    Nzimage/r   r   r   r   r   )r   r   r   r   r   r   )r   	mimetypes
guess_typer   rg   suffixrS   rc   )r6   rA   sniffedmime_r   s         r   _guess_mimer     s     (-- 	N"3t99--GD! ))  [  F  
c&, r   c                   	 |                                  }n4# t          $ r'}t                              d| |           Y d}~dS d}~ww xY wt	          | |          }t          j        |                              d          }d| d| S )u!  Encode a local image as a base64 data URL at its native size.

    Size limits are NOT enforced here — the agent retry loop
    (``run_agent._try_shrink_image_parts_in_messages``) shrinks on the
    provider's first rejection. Keeping this simple means providers that
    accept large images (OpenAI 49 MB+, Gemini 100 MB) don't pay a silent
    quality tax just because one other provider is stricter.

    Returns None only if the file can't be read (missing, permission
    denied, etc.); the caller reports those paths in ``skipped``.
    u'   image_routing: failed to read %s — %sNr   asciizdata:z;base64,)
read_bytesr   r   warningr   base64	b64encodedecode)r6   rA   r   r   b64s        r   _file_to_data_urlr     s    oo   @$LLLttttt t%%%D

3


&
&w
/
/C&4&&&&&s    
AAA	user_textimage_paths	List[str]
image_urlsOptional[List[str]]&Tuple[List[Dict[str, Any]], List[str]]c                   g }g }g }g }|D ]}t          |          }|                                r|                                s#|                    t	          |                     \t          |          }	|	s#|                    t	          |                     |                    dd|	id           |                    t	          |                     |pg D ]J}
|
pd                                }
|
s|                    dd|
id           |                    |
           K| pd                                }|s|r~|pd}g }|                    d |D                        |                    d |D                        | dd	                    |          z   }d
|dg}|                    |           ||fS g }|r|                    d
|d           ||fS )u  Build an OpenAI-style ``content`` list for a user turn.

    Shape:
      [{"type": "text", "text": "...\n\n[Image attached at: /local/path]"},
       {"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}},
       {"type": "image_url", "image_url": {"url": "https://example.com/a.png"}},
       ...]

    Local paths are read from disk and embedded as base64 ``data:`` URLs.
    Remote URLs (``http(s)://``) are passed through verbatim — the provider
    fetches them server-side. The model still sees the pixels either way.

    For each successfully attached image, a hint is appended to the text
    part:

      * local path → ``[Image attached at: <path>]``
      * URL        → ``[Image attached: <url>]``

    The hint gives the model a string handle so MCP/skill tools that take
    an image path or URL argument can be invoked on the same image without
    an extra round-trip. This parallels the text-mode hint produced by
    ``Runner._enrich_message_with_vision`` (``vision_analyze using image_url:
    <path>``) so behaviour is consistent across both image input modes.

    Images are attached at their native size. If a provider rejects the
    request because an image is too large (e.g. Anthropic's 5 MB per-image
    ceiling), the agent's retry loop transparently shrinks and retries
    once — see ``run_agent._try_shrink_image_parts_in_messages``.

    Returns (content_parts, skipped). Skipped entries are local paths
    that couldn't be read from disk; URLs are never skipped (they're
    not validated here).
    	image_urlrE   )typer   r]   zWhat do you see in this image?c              3  "   K   | ]
}d | dV  dS )z[Image attached at: ]Nr&   )r   ro   s     r   r   z-build_native_content_parts.<locals>.<genexpr>
  s.      NN!5555NNNNNNr   c              3  "   K   | ]
}d | dV  dS )z[Image attached: r   Nr&   )r   us     r   r   z-build_native_content_parts.<locals>.<genexpr>  s.      JJq2a222JJJJJJr   z


r   )r   r   )	r   existsis_filer/   r   r   rR   extendjoin)r   r   r   skippedimage_partsattached_pathsattached_urlsraw_pathro   data_urlrE   r   	base_text
hint_linescombined_textpartss                   r   build_native_content_partsr     s\   L G(*K "N!M - -NNxxzz 	 	NN3x==)))$Q'' 	NN3x==)))*
 
 	 	 	 	c(mm,,,,R " "yb!! 	
 
 	 	 	 	S!!!!O""$$D   <<	 "
NN~NNNNNNJJMJJJJJJ$***TYYz-B-BB06'N'N&O[!!!g~ E 5fd33444'>r   )r   r   rF   )r   r   r   r   )rA   r   r   rO   )rW   rX   rY   r   rZ   r   r   rO   )rA   r   r   r   )rW   rX   r   r"   r%   )rY   r   rZ   r   rW   rX   r   rO   )rY   r   rZ   r   rW   rX   r   r   )rA   r   r   r   )r6   r   rA   r   r   r   )r6   r   r   r   )r   r   r   r   r   r   r   r   )*__doc__
__future__r   r   loggingr   r5   r,   pathlibr   typingr   r   r   r   r	   	getLogger__name__r   	frozensetr{   _IMAGE_EXTSr   _IMAGE_EXT_PATTERNcompile
IGNORECASEr3   r;   rF   rT   rU   rV   ry   r}   r   r   r   r   r   r   r   __all__r&   r   r   <module>r      s     D # " " " " "       				 				       3 3 3 3 3 3 3 3 3 3 3 3 3 3		8	$	$ y33344 XXAA[AAAAA 
 "rz69KKfTM   
!$669OOM B B B BZ y33344	55566   "K K K K\      8 %)& & & & &6   `       F         4' ' ' '2 '+V V V V Vr  r   