
    )jO                       U d Z ddlZddlZddlZddlZddlZddlmZ ddlm	Z	m
Z
mZmZmZ ddlmZ ddlZddlZddlmZmZ ddlmZ  ej        e          Zdeez  fd	Z eh d
          Zee         ed<    ej         dej!                  Z" ej#        d          Z$dedefdZ%i a&e
ee
ee	f         f         ed<   da'e(ed<   i Z)e
ee
ee	f         f         ed<   dZ*e(ed<   dZ+i Z,e
ee
ee
ee	f         f         f         ed<   i Z-e
ee(f         ed<   dZ.g dZ/e/d         Z0dZ1i ddddddddddd dd!dd"dd#dd$dd%d&d'd(d)d*d+d*d,d(d-d.d/d.i d0d*d1d2d3d.d4d5d6d7d8d7d9d7d:d;d<d=d>dd?dd@ddAddBd.dCd;dDd5dEdi dFdGdHd;dIddJdKdLdMdNd7dOd7dPd=dQdRdSdRdTddUd7dVd;dWd;dXd;dYdGdZdGi d[d;d\dGd]dGd^d;d_d;d`dadbdGdcdGdddGdedKdfdGdgd5dhd5did5djdGdkdGdldMZ2dmZ3dedefdnZ4doZ5dpZ6dqZ7drZ8dsedefdtZ9ddvede
eef         fdwZ:dsedefdxZ;dsedefdyZ<i dzd{d|d{d}d~ddddddddddddddddddJdddddddddd4ddBddddddddddddddZ=e
eef         ed<   	 ddl>m?Z@  e@            D ]&ZAeAB                                ZCeCreCe=vr
eAjD        e=eC<   'n# eE$ r Y nw xY wdsedee         fdZFdsedefdZGdsedefdZHddsedvedee         fdZIde	fdZJdde	deKdeKdeeK         fdZLde
ee	f         deMedf         deeK         fdZNde
ee	f         deeK         fdZOde
ee	f         deeK         fdZPde
ee	f         de
ee	f         fdZQde
ee
ee	f         f         dede
ee	f         ddfdZRddede
ee
ee	f         f         fdZS	 	 ddsedvedede
ee
ee	f         f         fdZT	 ddedsedvedeeK         fdZUdefdZVde
eeKf         fdZWdedsedeKddfdZXdedsedeeK         fdZYdedseddfdÄZZdeKdeeK         fdńZ[dedeeK         fdǄZ\dedeKdeeK         fdɄZ]dedeeK         fdʄZ^dededefd̈́Z_ddedsedvedeeK         fd΄Z`ddedsedvedeeK         fdτZadedefdЄZbdedefdфZcdedefd҄ZdddedsedvedeeK         fdӄZededefdԄZfdedsedvedeeK         fdՄZgdddd.dddddddל
Zhe
eeKf         ed<   i aie
eeKf         ed<   daje(ed<   dZkdede
eeKf         fd݄Zl	 ddededeeK         fdބZm	 	 ddedsedvedeeeK         ef         fd߄Zn	 	 	 	 	 ddedsedvedeKdz  dedeodz  deKfdZpdedeKfdZqdee
ee	f                  deKfdZrde
ee	f         deKdeKfdZsde
ee	f         deKfdZtdudddee
ee	f                  dedeee
ee	f                           deKfdZudS )zModel metadata, context lengths, and token estimation utilities.

Pure utility functions with no AIAgent dependency. Used by ContextCompressor
and run_agent.py for pre-flight context checks.
    N)Path)AnyDictListOptionalTuple)urlparse)base_url_host_matchesbase_url_hostname)OPENROUTER_MODELS_URLreturnc                      dD ];} t          j        |           }|r#t           j                            |          r|c S <dS )a  Resolve SSL verify setting for `requests` calls from env vars.

    The `requests` library only honours REQUESTS_CA_BUNDLE / CURL_CA_BUNDLE
    by default. Hermes also honours HERMES_CA_BUNDLE (its own convention)
    and SSL_CERT_FILE (used by the stdlib `ssl` module and by httpx), so
    that a single env var can cover both `requests` and `httpx` callsites
    inside the same process.

    Returns either a filesystem path to a CA bundle, or True to defer to
    the requests default (certifi).
    )HERMES_CA_BUNDLEREQUESTS_CA_BUNDLESSL_CERT_FILET)osgetenvpathisfile)env_varvals     9/home/ubuntu/.hermes/hermes-agent/agent/model_metadata.py_resolve_requests_verifyr      sO     O  i   	27>>#&& 	JJJ4    >I   x-aix.aiz-aiz.aikimi-cnarcee-ai	deep-seek	gmi-cloud	novita-ai
minimax-cn
nvidia-nimmoonshot-cnqwen-portalxiaomi-mimoopencode-zengithub-modelsgoogle-geminiminimax-oauthtencent-cloudgoogle-ai-studiogoglmgminimxaizaizengrokkilokimimimonousqwenarceelocalzhipualiyunclaudecustomgeminigithubgooglenovitanvidiaollamaxiaomialibabaarceeaicopilotminimaxstepfuntencentdeepseekgmicloudkilocodemoonshotnemotronnovitaaiopencodetokenhub	anthropic	dashscope
openroutertencentmaas
qwen-oauthcopilot-acpkimi-codingopencode-goollama-cloudopenai-codexgithub-copilotkimi-coding-cntencent-tokenhub_PROVIDER_PREFIXESzE^(\d+\.?\d*b|latest|stable|q\d|fp?\d|instruct|chat|coder|vision|text)z100.64.0.0/10modelc                 ,   d| vs|                      d          r| S |                     dd          \  }}|                                                                }|t          v r0t
                              |                                          r| S |S | S )ua  Strip a recognised provider prefix from a model string.

    ``"local:my-model"`` → ``"my-model"``
    ``"qwen3.5:27b"``   → ``"qwen3.5:27b"``  (unchanged — not a provider prefix)
    ``"qwen:0.5b"``     → ``"qwen:0.5b"``    (unchanged — Ollama model:tag)
    ``"deepseek:latest"``→ ``"deepseek:latest"``(unchanged — Ollama model:tag)
    :http   )
startswithsplitstriplowerrd   _OLLAMA_TAG_PATTERNmatch)re   prefixsuffixprefix_lowers       r   _strip_provider_prefixrs   V   s     %5++F33[[a((NFF<<>>''))L)))$$V\\^^44 	LLr   _model_metadata_cache_model_metadata_cache_time_novita_metadata_cache_novita_metadata_cache_timei  _endpoint_model_metadata_cache#_endpoint_model_metadata_cache_timei,  )       i }  i>  i@  r|   zclaude-fable-5@B zclaude-fablezclaude-opus-4-8zclaude-opus-4.8zclaude-opus-4-7zclaude-opus-4.7zclaude-opus-4-6zclaude-sonnet-4-6zclaude-opus-4.6zclaude-sonnet-4.6r@   i@ gpt-5.5i zgpt-5.4-nano gpt-5.4-minigpt-5.4gpt-5.3-codex-sparkr{   zgpt-5.1-chatgpt-5zgpt-4.1i zgpt-4rB   i   zgemma-4rz   gemma4zgemma-4-31bzgemma-3i   gemmai    zdeepseek-v4-prozdeepseek-v4-flashzdeepseek-chatzdeepseek-reasonerrO   llamazqwen3.6-pluszqwen3-coder-pluszqwen3-coderi   r;   
minimax-m3rL      r0   i  z
grok-buildzgrok-code-fastzgrok-2-visionzgrok-4-fasti z	grok-4.20grok-4.3zgrok-4zgrok-3zgrok-2r6   r8   zhy3-previewrS   trinityelephantzQwen/Qwen3.5-397B-A17BzQwen/Qwen3.5-35B-A3Bzdeepseek-ai/DeepSeek-V3.2i   zmoonshotai/Kimi-K2.5zmoonshotai/Kimi-K2.6zmoonshotai/Kimi-K2-ThinkingzMiniMaxAI/MiniMax-M2.5zXiaomiMiMo/MiMo-V2-Flashzmimo-v2-prozmimo-v2.5-proz	mimo-v2.5zmimo-v2-omnizmimo-v2-flashzzai-org/GLM-5)zgrok-3-minizgrok-4.20-multi-agentr   c                     | pd                                                                 sdS dD ]"}|v r                    |d          d         #t          fdt          D                       S )a$  Return True when an xAI Grok model accepts ``reasoning.effort``.

    Allowlist by substring (matches both bare ``grok-3-mini`` and
    aggregator-prefixed ``x-ai/grok-3-mini``). Conservative by design:
    if a future Grok model isn't listed, we send no effort dial rather
    than 400.
     F)/ri   c              3   B   K   | ]}                     |          V  d S N)rj   ).0rp   names     r   	<genexpr>z1grok_supports_reasoning_effort.<locals>.<genexpr>&  s/      SS6tv&&SSSSSSr   )rl   rm   rsplitany_GROK_EFFORT_CAPABLE_PREFIXES)re   sepr   s     @r   grok_supports_reasoning_effortr     s     KR  &&((D u + +$;;;;sA&&r*DSSSS5RSSSSSSr   )context_lengthcontext_windowcontext_sizemax_context_lengthmax_position_embeddingsmax_model_lenmax_input_tokensmax_sequence_lengthmax_seq_lenn_ctx_trainn_ctxctx_size)max_completion_tokensmax_output_tokens
max_tokens)	localhostz	127.0.0.1z::1z0.0.0.0)z.docker.internalz.containers.internalz.lima.internalbase_urlc                 T    | pd                                                     d          S )Nr   r   )rl   rstripr   s    r   _normalize_base_urlr   H  s&    N!!##**3///r   r   api_keyc                 ^    t          | pd                                          }|si S dd| iS )Nr   AuthorizationBearer )strrl   )r   tokens     r   _auth_headersr   L  s@    2$$&&E 	.u..//r   c                 "    t          | d          S )Nopenrouter.ai)r
   r   s    r   _is_openrouter_base_urlr   S  s     ?;;;r   c                 ^    t          |           }t          |          ot          |           S r   )r   boolr   )r   
normalizeds     r   _is_custom_endpointr   W  s.    $X..J
G$;J$G$G GGr   zapi.openai.comopenaizchatgpt.comapi.anthropic.comrW   zapi.z.air4   zopen.bigmodel.cnzapi.moonshot.air]   zapi.moonshot.cnrb   zapi.kimi.comzapi.stepfun.airM   zapi.stepfun.comzapi.arcee.air<   zapi.minimaxzdashscope.aliyuncs.comrI   zdashscope-intl.aliyuncs.comzportal.qwen.air[   r   rY   z!generativelanguage.googleapis.comr:   rK   	fireworksr^   r3   rF   rH   r1   rE   rc   r_   )zinference-api.nousresearch.comzapi.deepseek.comzapi.githubcopilot.comzmodels.github.aizmodels.inference.ai.azure.comzapi.fireworks.aizopencode.aizapi.x.aizintegrate.api.nvidia.comzapi.xiaomimimo.comzxiaomimimo.comzapi.gmi-serving.comapi.novita.aiztokenhub.tencentmaas.comz
ollama.com_URL_TO_PROVIDER)list_providersc                    t          |           }|sdS t          d|v r|nd|           }|j                                        p|j                                        }t
                                          D ]\  }}||v r|c S dS )a  Infer the models.dev provider name from a base URL.

    This allows context length resolution via models.dev for custom endpoints
    like DashScope (Alibaba), Z.AI, Kimi, etc. without requiring the user to
    explicitly set the provider name in config.
    N://zhttps://)r   r	   netlocrm   r   r   items)r   r   parsedhosturl_partproviders         r   _infer_provider_from_urlr     s     %X..J tEZ$7$7jj=T
=T=TUUF=  7FK$5$5$7$7D.4466  (tOOO 4r   c                 $    t          |           d uS r   )r   r   s    r   _is_known_provider_base_urlr     s    #H--T99r   c                    t          |           }|sdS d|v r|nd| }	 t          |          }|j        pdn# t          $ r Y dS w xY wt          v rdS t          fdt          D                       rdS rdvrdS 	 t          j                  }|j	        s|j
        s|j        rdS t          |t          j                  r|t          v rdS n# t          $ r Y nw xY w                    d          }t#          |          dk    r	 t%          |d	                   t%          |d
                   }}|dk    rdS |dk    rd|cxk    rdk    rn ndS |dk    r|dk    rdS |dk    rd|cxk    rdk    rn ndS n# t          $ r Y nw xY wdS )a  Return True if base_url points to a local machine.

    Recognises loopback (``localhost``, ``127.0.0.0/8``, ``::1``),
    container-internal DNS names (``host.docker.internal`` et al.),
    RFC-1918 private ranges (``10/8``, ``172.16/12``, ``192.168/16``),
    link-local, and Tailscale CGNAT (``100.64.0.0/10``). Tailscale CGNAT
    is included so remote-but-trusted Ollama boxes reached over a
    Tailscale mesh get the same timeout auto-bumps as localhost Ollama.
    Fr   zhttp://r   Tc              3   B   K   | ]}                     |          V  d S r   )endswith)r   rq   r   s     r   r   z$is_local_endpoint.<locals>.<genexpr>  s/      
I
IV4==  
I
I
I
I
I
Ir   .   r   ri   
                  d   @      )r   r	   hostname	Exception_LOCAL_HOSTSr   _CONTAINER_LOCAL_SUFFIXES	ipaddress
ip_address
is_privateis_loopbackis_link_local
isinstanceIPv4Address_TAILSCALE_CGNAT
ValueErrorrk   lenint)	r   r   urlr   addrpartsfirstsecondr   s	           @r   is_local_endpointr     s5    %X..J u++**1G:1G1GC#$"   uu|t

I
I
I
I/H
I
I
III t  4t#D))? 	d. 	$2D 	4dI122 	t?O7O7O4    JJsOOE
5zzQ	aMM3uQx==6E{{t||f 2 2 2 2 2 2 2 2 2t||#t||f 3 3 3 3 3 3 3 3 3t 	 	 	D	5sK   : 
AA)C ,#C 
CC0E< =E< E< #E< <
F	F	c                    ddl }t          |           }|}|                    d          r
|dd         }t          |          }	 |                    d|          5 }	 |                    | d          }|j        dk    r	 ddd           d	S n# t          $ r Y nw xY w	 |                    | d
          }|j        dk    r8	 |                                }d|v r	 ddd           dS n# t          $ r Y nw xY wn# t          $ r Y nw xY w	 |                    | d          }|j        dk    r|                    | d          }|j        dk    rd|j	        v r	 ddd           dS n# t          $ r Y nw xY w	 |                    | d          }|j        dk    r&|                                }d|v r	 ddd           dS n# t          $ r Y nw xY wddd           n# 1 swxY w Y   n# t          $ r Y nw xY wdS )zDetect which local server is running at base_url by probing known endpoints.

    Returns one of: "ollama", "lm-studio", "vllm", "llamacpp", or None.
    r   N/v1g       @timeoutheaders/api/v1/models   	lm-studioz	/api/tagsmodelsrG   	/v1/props/propsdefault_generation_settingsllamacppz/versionversionvllm)
httpxr   r   r   Clientgetstatus_coder   jsontext)	r   r   r  r   
server_urlr   clientrdatas	            r   detect_local_server_typer    s`   
 LLL$X..JJ5!! %_
G$$G*\\#w\77 '	6JJ*<<<===C''&'	 '	 '	 '	 '	 '	 '	 '	 (   

JJ*77788=C'' vvxx#t++#+#'	 '	 '	 '	 '	 '	 '	 '	  ,$      JJ*77788=C''

j#8#8#899A=C'',IQV,S,S%9'	 '	 '	 '	 '	 '	 '	 '	:    JJ*66677=C''6688D D((%K'	 '	 '	 '	 '	 '	 '	 '	L    M'	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	P     4s  G/ G##BG/ G#
BG#BG#!#C=C,G/ +C=,
C96C=8C99C=<G#=
D
G#	D

G#AE,G/ +G#,
E96G#8E99G#=;G9G/ G#
GG#GG#G/ #G''G/ *G'+G/ /
G<;G<valuec              #      K   t          | t                    r2| V  |                                 D ]}t          |          E d {V  d S t          | t                    r| D ]}t          |          E d {V  d S d S r   )r   dictvalues_iter_nested_dictslist)r  nesteditems      r   r  r    s      % 0llnn 	2 	2F)&1111111111	2 	2	E4	 	  0 	0 	0D)$//////////0 0	0 	0r      逖 minimummaximumc                 $   	 t          | t                    rd S t          | t                    r(|                                                     dd          } t          |           }n# t          t          f$ r Y d S w xY w||cxk    r|k    rn n|S d S )N,r   )r   r   r   rl   replacer   	TypeErrorr   )r  r  r  results       r   _coerce_reasonable_intr  %  s    eT"" 	4eS!! 	3KKMM))#r22EUz"   tt&####G#####4s   A& AA& &A;:A;payloadkeys.c                     d |D             }t          |           D ]W}|                                D ]@\  }}t          |                                          |vr)t	          |          }||c c S AXd S )Nc                 6    h | ]}|                                 S  rm   )r   keys     r   	<setcomp>z%_extract_first_int.<locals>.<setcomp>4  s     ***cciikk***r   )r  r   r   rm   r  )r  r  keysetmappingr$  r  coerceds          r   _extract_first_intr)  3  s    **T***F%g..  !--// 	 	JC3xx~~v--,U33G" #		 4r   c                 ,    t          | t                    S r   )r)  _CONTEXT_LENGTH_KEYSr  s    r   _extract_context_lengthr-  ?      g';<<<r   c                 ,    t          | t                    S r   )r)  _MAX_COMPLETION_KEYSr,  s    r   _extract_max_completion_tokensr1  C  r.  r   c                 X  	 |                      d          }|                      d          }||Ri }|%t          t          |          dz  dz            |d<   |%t          t          |          dz  dz            |d<   |S ddd	d
dd}t          |           D ]}d |                                D             	t          	fd|                                D                       sNi }|                                D ]%\  }}|D ]}|	v r	|         dvr	|         ||<    n&|r|c S i S )Ninput_token_price_per_moutput_token_price_per_mi'  r}   prompt
completion)r5  inputinput_cost_per_tokenprompt_token_cost)r6  outputoutput_cost_per_tokencompletion_token_cost)requestrequest_cost)
cache_readcached_promptinput_cache_readcache_read_cost_per_token)cache_writecache_creationinput_cache_writecache_write_cost_per_token)r5  r6  r=  r?  rC  c                 X    i | ]'\  }}t          |                                          |(S r"  )r   rm   )r   r$  r  s      r   
<dictcomp>z$_extract_pricing.<locals>.<dictcomp>Z  s.    PPP*#uc#hhnn&&PPPr   c              3   N   K   | ]}t          fd |D                       V   dS )c              3       K   | ]}|v V  	d S r   r"  )r   aliasr   s     r   r   z-_extract_pricing.<locals>.<genexpr>.<genexpr>[  s(      @@uu
*@@@@@@r   N)r   )r   aliasesr   s     r   r   z#_extract_pricing.<locals>.<genexpr>[  s@      ccW3@@@@@@@@@ccccccr   >   Nr   )r  r   floatr  r   r   r  )
r  novita_inputnovita_outputpricing	alias_mapr'  targetrL  rK  r   s
            @r   _extract_pricingrS  G  s   ;;899LKK :;;M=#<"$# #E,$7$7&$@9$L M MGH$$'m(<(<v(E	(Q$R$RGL! S`.fk I &g..  PPPPP
ccccPYP`P`PbPbccccc 	"$(00 	 	OFG   J&&:e+<J+N+N&0&7GFOE 	NNN	Ir   cachemodel_identryc                     || |<   d|v r4|                     dd          d         }|                     ||           d S d S )Nr   ri   )rk   
setdefault)rT  rU  rV  
bare_models       r   _add_model_aliasesrZ  h  sO    E(O
h^^C++A.
U+++++ r   Fforce_refreshc                    | s2t           r+t          j                    t          z
  t          k     rt           S 	 t	          j        t          dt                                }|                                 |	                                }i }|                    dg           D ]}|                    dd          }|                    dd          |                    di                               d	d
          |                    d|          |                    di           d}t          |||           |                    dd          }|r||k    rt          |||           |a t          j                    at                              dt          |                     |S # t          $ r0}t                              d|            t           pi cY d}~S d}~ww xY w)z9Fetch model metadata from OpenRouter (cached for 1 hour).r   )r   verifyr
  idr   r   r{   top_providerr   i   r   rP  )r   r   r   rP  canonical_slugz.Fetched metadata for %s models from OpenRouterz0Failed to fetch model metadata from OpenRouter: N)rt   timeru   _MODEL_CACHE_TTLrequestsr  r   r   raise_for_statusr  rZ  loggerdebugr   r   warning)	r[  responser
  rT  re   rU  rV  	canonicales	            r   fetch_model_metadatark  o  s     %2 %	F`8`dt7t7t$$+< 5rJbJdJdeee!!###}}XXfb)) 	< 	<Eyyr**H"')),<f"E"E).>2)F)F)J)JKbdh)i)i		&(33 99Y33	 E uh666		"2B77I <Y(22"5)U;;; %%)Y[["Es5zzRRR + + +M!MMNNN$*******+s   E*F! !
G+%GGGc                 v   t          |           }|rt          |          ri S |sXt                              |          }t                              |d          }|!t          j                    |z
  t          k     r|S |g}|                    d          r|dd                             d          }n|dz   }|r||vr|	                    |           |rdd| ini }d}	t          |          r	 t          ||          d	k    rP|                    d          r|dd                             d          n|}
t          j        |
                    d          d
z   |dt                                }|                                 |                                }i }|                    dg           D ]y}t!          |t"                    s|                    d          p|                    d          }|sFd|                    d|          i}d}|                    dg           pg D ]y}t!          |t"                    s|                    di           }t!          |t"                    r|                    d          nd}t!          |t$                    r
|dk    r|} nz|||d<   t'          |          }|||d<   t)          |          }|r||d<   t+          |||           |                    d          }t!          |t,                    r|r||k    rt+          |||           {|t          |<   t          j                    t          |<   |S n# t.          $ r}|}	Y d}~nd}~ww xY w|D ]}|                    d          dz   }	 t          j        ||dt                                }|                                 |                                }i }|                    dg           D ]}t!          |t"                    s|                    d          }|s0d|                    d|          i}t1          |          }|||d<   t'          |          }|||d<   t)          |          }|r||d<   t+          |||           t3          d |                    dg           D                       }|r	 |                    d                              dd          }t                      }t          j        |dz   |d|          }|j        st          j        |dz   |d|          }|j        rh|                                }|                    di           } |                     d          }!|                    dd          }"|!r|"r|"|v r|!||"         d<   n# t.          $ r Y nw xY w|t          |<   t          j                    t          |<   |c S # t.          $ r}|}	Y d}~{d}~ww xY w|	rt8                              d ||	           i t          |<   t          j                    t          |<   i S )!zFetch model metadata from an OpenAI-compatible ``/models`` endpoint.

    This is used for explicit custom endpoints where hardcoded global model-name
    defaults are unreliable. Results are cached in memory per base URL.
    r   Nr   r   r   r   r   r   r   r   r   r   r   r]  r   r$  r^  r   loaded_instancesconfigr   r   rP  z/modelsr
  c              3   r   K   | ]2}t          |t                    |                    d           dk    V  3dS )owned_byr   N)r   r  r  )r   ms     r   r   z0fetch_endpoint_model_metadata.<locals>.<genexpr>   sV        Jq$4G4Gj!!Z/     r   r   r      r   r   r   model_aliasz1Failed to fetch model metadata from %s/models: %s)r   r   rx   r  ry   ra  _ENDPOINT_MODEL_CACHE_TTLr   r   appendr   r  rc  r   rd  r  r   r  r   r1  rS  rZ  r   r   r-  r   r  okre  rf  )#r   r   r[  r   cached	cached_at
candidates	alternater   
last_errorr  rh  r  rT  re   rU  rV  r   instcfgctxr   rP  alt_idexc	candidater   is_llamacppbase_verify
props_resppropsgen_settingsr   ru  s#                                      r   fetch_endpoint_model_metadatar    s#    %X..J 0<< 	 /33J??7;;JJJ	49;;#:>W"W"WMJ5!! 'ssO**3//		&	 %Yj00)$$$8?G 3' 3 344RG&*J$$ 21	'
GDDDSS<F<O<OPU<V<VfZ_33C888\f
#<%%c**-==#355	   ))+++"--//35$[[266 A AE%eT22 ! $yy//B599T??H# ! -3UYYvx5P5P,QE%)N %		*<b A A GR " ")$55 %$"hhx44;Ec4;P;PZcgg&6777VZ%c3// "C!GG-0N!E%12@./,J5,Q,Q),89N56.u55G 3+2i(&uh>>>"YYt__F!&#.. A6 Af>P>P*5&%@@@=B.z:BF)++3J?] T^  	 	 	JJJJJJ	   4 4	s##i/2	|C"MeMgMghhhH%%'''mmooG/1E VR00 ; ;!%..  99T?? )/681L1L(M!8!?!?!-.<E*+(Fu(M(M%(45JE12*511 /'.E)$"5(E::::    VR00    K  $++C0088CCD688G!)d[.@'[\el!m!m!mJ%= o%-\$/7\]fm%n%n%n
!} I * 1 1',yy1NPR'S'S , 0 0 9 9&+iir&B&B  I[ I[E5I5ICHE+./?@    D :?*:6>Bikk/
;LLL 	 	 	JJJJJJ	  bH*V`aaa13":.6:ikk'
3IsW   1I%M 
M."M))M.D/W?C#V#"W#
V0-W/V00)W
W3&W..W3c                    t          ||          }|                    |           }|sht          |          dk    r/t          t	          |                                                    }n&|                                D ]\  }}| |v s|| v r|} n|r,|                    d          }t          |t                    r|S dS )zDResolve context length from an endpoint's live ``/models`` metadata.rm  ri   r   N)	r  r  r   nextiterr  r   r   r   )re   r   r   endpoint_metadatamatchedr$  rV  r   s           r    _resolve_endpoint_context_lengthr  #  s     6hPPP##E**G  !!Q&&4 1 8 8 : :;;<<GG/5577  
UC<<3%<<#GE $0  " %566nc** 	"!!4r   c                  (    ddl m}   |             dz  S )z8Return path to the persistent context length cache file.r   get_hermes_homezcontext_length_cache.yaml)hermes_constantsr  r  s    r   _get_context_cache_pathr  :  s(    000000?:::r   c                  ^   t                      } |                                 si S 	 t          | d          5 }t          j        |          pi }ddd           n# 1 swxY w Y   |                    di           S # t          $ r'}t                              d|           i cY d}~S d}~ww xY w)z:Load the model+provider -> context_length cache from disk.utf-8encodingNcontext_lengthsz'Failed to load context length cache: %s)	r  existsopenyaml	safe_loadr  r   re  rf  )r   fr
  rj  s       r   _load_context_cacher  @  s    "$$D;;== 	$))) 	+Q>!$$*D	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+xx)2...   >BBB						s@   A; AA; AA; !A"A; ;
B,B'!B,'B,lengthc                    |  d| }t                      }|                    |          |k    rdS |||<   t                      }	 |j                            dd           t          |dd          5 }t          j        d|i|d	
           ddd           n# 1 swxY w Y   t          	                    d||d           dS # t          $ r&}t                              d|           Y d}~dS d}~ww xY w)zPersist a discovered context length for a model+provider combo.

    Cache key is ``model@base_url`` so the same model name served from
    different providers can have different limits.
    @NTparentsexist_okwr  r  r  Fdefault_flow_stylez%Cached context length %s -> %s tokensr  z'Failed to save context length cache: %s)r  r  r  parentmkdirr  r  dumpre  infor   rf  )re   r   r  r$  rT  r   r  rj  s           r   save_context_lengthr  N  sp    

X

C!!Eyy~~E#J"$$DC$666$g... 	O!I(%0!NNNN	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O;SV--PPPPP C C C>BBBBBBBBBCs<   .C 3BC BC  B!!C 
C4C//C4c                 V    |  d| }t                      }|                    |          S )zBLook up a previously discovered context length for model+provider.r  )r  r  )re   r   r$  rT  s       r   get_cached_context_lengthr  c  s0    

X

C!!E99S>>r   c                    |  d| }t                      }||vrdS ||= t                      }	 |j                            dd           t	          |dd          5 }t          j        d|i|d	
           ddd           dS # 1 swxY w Y   dS # t          $ r'}t          	                    d||           Y d}~dS d}~ww xY w)zCDrop a stale cache entry so it gets re-resolved on the next lookup.r  NTr  r  r  r  r  Fr  z6Failed to invalidate context length cache entry %s: %s)
r  r  r  r  r  r  r  r   re  rf  )re   r   r$  rT  r   r  rj  s          r   !_invalidate_cached_context_lengthr  j  sV   

X

C!!E
%c
"$$DW$666$g... 	O!I(%0!NNNN	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O W W WMsTUVVVVVVVVVWs;   .B B6B BB 
BB 
CB<<Ccurrent_lengthc                 .    t           D ]}|| k     r|c S dS )z@Return the next lower probe tier, or None if already at minimum.N)CONTEXT_PROBE_TIERS)r  tiers     r   get_next_probe_tierr  z  s/    #  .  KKK !4r   	error_msgc                     |                                  }g d}|D ]O}t          j        ||          }|r6t          |                    d                    }d|cxk    rdk    rn K|c S PdS )a?  Try to extract the actual context limit from an API error message.

    Many providers include the limit in their error text, e.g.:
      - "maximum context length is 32768 tokens"
      - "context_length_exceeded: 131072"
      - "Maximum context size 32768 exceeded"
      - "model's max context length is 65536"
    )zY(?:max(?:imum)?|limit)\s*(?:context\s*)?(?:length|size|window)?\s*(?:is|of|:)?\s*(\d{4,})z:context\s*(?:length|size|window)\s*(?:is|of|:)?\s*(\d{4,})z)(\d{4,})\s*(?:token)?\s*(?:context|limit)z">\s*(\d{4,})\s*(?:max|limit|token)z(\d{4,})\s*(?:max(?:imum)?)\bri   r  r  Nrm   researchr   group)r  error_lowerpatternspatternro   limits         r   parse_context_limit_from_errorr    s     //##K  H   	';// 	A''Eu****
*****4r   current_context_lengthc                 <    t          |           }|dS ||k     r|S dS )u  Return a provider-reported lower context limit, if one is present.

    Context-overflow recovery must not invent a new model window size.  Some
    providers only say that the input exceeds the context window without
    reporting the actual maximum.  In that case callers should keep the
    configured context length and try compression only, rather than stepping
    down through guessed probe tiers (1M → 256K → 128K → ...).
    N)r  )r  r  parsed_limits      r   &get_context_length_from_provider_errorr    s3     2)<<Lt,,,4r   c                 T   |                                  }d|v od|v pd|v pd|v od|v pd|v od|v od|v }|sdS g d	}|D ]E}t          j        ||          }|r,t          |                    d
                    }|d
k    r|c S Ft          j        d|          }t          j        d|          }|rr|rpt          |                    d
                    t          |                    d
                    z
  t          |                    d                    z
  }	|	d
k    r|	S t          j        d|          }
t          j        d|          }|
rY|rWt          |
                    d
                    }t          |                    d
                    dz   dz  }||z
  }	|	d
k    r|	S dS )ul  Detect an "output cap too large" error and return how many output tokens are available.

    Background — two distinct context errors exist:
      1. "Prompt too long"  — the INPUT itself exceeds the context window.
           Fix: compress history, and only reduce context_length if the
           provider explicitly reports the actual lower limit.
      2. "max_tokens too large" — input is fine, but input + requested_output > window.
           Fix: reduce max_tokens (the output cap) for this call.
           Do NOT touch context_length — the window hasn't shrunk.

    Anthropic's API returns errors like:
      "max_tokens: 32768 > context_window: 200000 - input_tokens: 190000 = available_tokens: 10000"

    Returns the number of output tokens that would fit (e.g. 10000 above), or None if
    the error does not look like a max_tokens-too-large error.
    r   available_tokenszavailable tokenszin the outputzmaximum context length	requestedzoutput tokensN)zavailable_tokens[:\s]+(\d+)zavailable\s+tokens[:\s]+(\d+)z=\s*(\d+)\s*$ri   zmaximum context length is (\d+)zK\((\d+)\s+of text input,\s*(\d+)\s+of tool input,\s*(\d+)\s+in the output\)   z'maximum context length is (\d+)\s*tokenz!prompt contains (\d+)\s*character   r  )r  r  is_output_cap_errorr  r  ro   tokens_m_ctx_m_parts
_available
_m_ctx_tok_m_chars_ctx
_est_inputs                 r   (parse_available_output_tokens_from_errorr    s   " //##K 	# 	U;.S2D2S
 	;& 	4$3 	!K/ 	+;&	+{*! $  t  H   	';// 	Q((F{{ Y9;GGFyV H  ( a))Cq0A0A,B,BBSXYIZIZE[E[[
?? E{SSJy={KKH h :##A&&''(..++,,q0Q6
J&
??4r   candidate_idlookup_modelc                 b    | |k    rdS d| v r"|                      dd          d         |k    rdS dS )a  Return True if *candidate_id* (from server) matches *lookup_model* (configured).

    Supports two forms:
    - Exact match:  "nvidia-nemotron-super-49b-v1" == "nvidia-nemotron-super-49b-v1"
    - Slug match:   "nvidia/nvidia-nemotron-super-49b-v1" matches "nvidia-nemotron-super-49b-v1"
                    (the part after the last "/" equals lookup_model)

    This covers LM Studio's native API which stores models as "publisher/slug"
    while users typically configure only the slug after the "local:" prefix.
    Tr   ri   F)r   )r  r  s     r   _model_id_matchesr  	  sH     |##t
l|223::1=MMt5r   c                 @   ddl }t          |           }|                    d          }|                    d          r
|dd         }	 t	          ||          }n# t
          $ r Y dS w xY w|dk    rdS t          |          }	 |                    d|	          5 }|                    | d
d|i          }	|	j	        dk    r	 ddd           dS |	
                                }
|
                    dd          }d|v r|                    d          D ]s}d|v rm|                                                                }t          |          dk    r4	 t          |d                   c cddd           S # t           $ r Y ow xY wt|
                    di           }|                                D ]B\  }}d|v r9t%          |t          t&          f          rt          |          c cddd           S C	 ddd           n# 1 swxY w Y   n# t
          $ r Y nw xY wdS )ay  Query an Ollama server for the model's context length.

    Returns the model's maximum context from GGUF metadata via ``/api/show``,
    or the explicit ``num_ctx`` from the Modelfile if set.  Returns None if
    the server is unreachable or not Ollama.

    This is the value that should be passed as ``num_ctx`` in Ollama chat
    requests to override the default 2048.
    r   Nr   r   r   rm  rG         @r   	/api/showr   r  r   
parametersr   num_ctx
r  r   
model_infor   )r  rs   r   r   r  r   r   r  postr  r  r  rk   rl   r   r   r   r   r   rM  )re   r   r   r  rY  r  server_typer   r  respr
  paramsliner   r  r$  r  s                    r   query_ollama_num_ctxr    s    LLL'..J%%J5!! %_
.xIII   tthtG$$G\\#w\77 	&6;;*777vz>R;SSD3&&	& 	& 	& 	& 	& 	& 	& 	& 99;;D XXlB//FF"""LL.. % %D D(( $

 2 2 4 4u::??%'*59~~ 5 5	& 	& 	& 	& 	& 	& 	& 	& $. % % % $% ,33J(..00 & &
U#s**z%#u/N/N*u::%%-	& 	& 	& 	& 	& 	& 	& 	&(&)	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	&.    4s   	A 
A)(A)H )HH BHE7(H*H 7
FHFA#H'H 4H6H HH 	H
H 
HHc                    ddl }|                    d          }|                    d          r
|dd         }t          |          }	 |                    d|          5 }|                    | dd	| i
          }|j        dk    r	 ddd           dS |                                }|                    di           }	|		                                D ]J\  }
}d|
v rAt          |t          t          f          r%t          |          }|dk    r|c cddd           S K|                    dd          }d|v r|                    d          D ]|}d|v rv|                                                                }t          |          dk    r=	 t          |d                   }|dk    r|c cddd           S l# t           $ r Y xw xY w}ddd           n# 1 swxY w Y   n# t"          $ r Y nw xY wdS )u  Query an Ollama server's native ``/api/show`` for context length.

    Provider-agnostic: works against ANY Ollama-compatible server regardless
    of hostname — local Ollama, Ollama Cloud (``ollama.com``), custom Ollama
    hosting behind a reverse proxy, etc.  For non-Ollama servers the POST
    returns 404/405 quickly; the function handles errors gracefully.

    For hosted servers the GGUF ``model_info.*.context_length`` is the
    authoritative source: the user can't set their own ``num_ctx``, and the
    OpenAI-compat ``/v1/models`` endpoint correctly omits ``context_length``
    per the OpenAI schema.

    Resolution order for hosted Ollama:
      1. ``model_info.*.context_length`` — GGUF training max (authoritative)
      2. ``parameters`` → ``num_ctx`` — server-side Modelfile override
    The order is flipped vs ``query_ollama_num_ctx()`` because local users
    control ``num_ctx`` themselves; hosted users can't.
    r   Nr   r   r   g      @r   r  r   r  r   r  r   r  r  r   r  r  r  r   )r  r   r   r   r  r  r  r  r  r   r   r   rM  rk   rl   r   r   r   )re   r   r   r  r  r   r  r  r
  r  r$  r  r  r  r  r   s                   r   _query_ollama_api_showr  S  s   & LLL%%J5!! %_
G$$G\\#w\77 	%6;;*777vuo;NND3&&	% 	% 	% 	% 	% 	% 	% 	% 99;;D ,33J(..00 # #
U#s**z%#u/N/N*e**Cd{{"

	% 	% 	% 	% 	% 	% 	% 	%  XXlB//FF"""LL.. 	% 	%D D(( $

 2 2 4 4u::??%&)%)nn#&$;;+.JJ3	% 	% 	% 	% 	% 	% 	% 	%0 $/#- % % % $%5	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	%8    4s   	G#  )G	G# A;GG# A/GF:*G,G# 9G:
GGGGG# GG# GG# #
G0/G0c                 \    |                                  }|                    d          pd|v S )aB  Return True if the model name looks like a Kimi-family model.

    Catches ``kimi-k2.6``, ``kimi-k2.5``, ``kimi-k2-thinking``,
    ``moonshotai/Kimi-K2.6``, and similar variants.  Used as a guard
    against stale OpenRouter metadata that underreports these models
    as 32K context when they actually support 262K+.
    r8   rR   )rm   rj   )re   rm   s     r   _model_name_suggests_kimir    s/     KKMMEF##:zU'::r   c                 .    d|                                  v S )a  Return True if the model name looks like MiniMax M3.

    Catches ``MiniMax-M3``, ``minimax/minimax-m3``, and similar variants
    across surfaces (native MiniMax-M3, OpenRouter/Nous minimax/minimax-m3).
    Used as a guard against stale cache entries seeded by pre-catalog builds
    that resolved M3 via the generic ``minimax`` catch-all (204,800) before
    the ``minimax-m3`` (1M) entry existed in DEFAULT_CONTEXT_LENGTHS.
    r   r#  re   s    r   _model_name_suggests_minimax_m3r    s     5;;==((r   c                 .    d|                                  v S )az  Return True if the model name looks like a Grok 4.3 variant.

    Catches ``grok-4.3``, ``grok-4.3-latest``, and similar slugs.
    Used as a guard against stale cache entries seeded by pre-catalog builds
    that resolved grok-4.3 via the generic ``grok-4`` catch-all (256,000)
    before the ``grok-4.3`` (1M) entry was added to DEFAULT_CONTEXT_LENGTHS
    on 2026-05-15.
    r   r#  r  s    r   _model_name_suggests_grok_4_3r    s     &&r   c                 	   ddl }t          |           } |                    d          }|                    d          r
|dd         }t	          |          }	 t          ||          }n# t          $ r d}Y nw xY w	 |                    d|          5 }|d	k    rL|                    | d
d| i          }|j	        dk    r$|
                                }	|	                    dd          }
d|
v r|
                    d          D ]s}d|v rm|                                                                }t          |          dk    r4	 t          |d                   c cddd           S # t           $ r Y ow xY wt|	                    di           }|                                D ]B\  }}d|v r9t%          |t          t&          f          rt          |          c cddd           S C|dk    r|                    | d          }|j	        dk    r|
                                }	|	                    dg           D ]}t)          |                    dd          |           s$t)          |                    dd          |           r|                    dg           D ]j}|                    di           }|                    d          }|r;t%          |t          t&          f          rt          |          c c cddd           S k n|                    | d|            }|j	        dk    r|
                                }	|	                    d          p)|	                    d          p|	                    d          }|r7t%          |t          t&          f          rt          |          cddd           S |                    | d           }|j	        dk    r|
                                }	|	                    d!g           }|D ]}t)          |                    dd          |           rz|                    d          p)|                    d          p|                    d          }|r9t%          |t          t&          f          rt          |          c cddd           S ddd           n# 1 swxY w Y   n# t          $ r Y nw xY wdS )"z4Query a local server for the model's context length.r   Nr   r   r   rm  r  r   rG   r  r   r  r   r  r   r  r  r  r   r  r   r   r   r   r$  r^  ro  rp  z/v1/models/r   r   z
/v1/modelsr
  )r  rs   r   r   r   r  r   r  r  r  r  r  rk   rl   r   r   r   r   r   rM  r  )re   r   r   r  r  r   r  r  r  r
  r  r  r   r  r$  r  rs  r~  r  r  models_lists                        r   _query_local_context_lengthr    s#   LLL #5))E %%J5!! %_
G$$G.xIII   E\\#w\77 B	,6h&&{{j#;#;#;65/{RR#s**99;;D "XXlB77F F**$*LL$6$6 - -D(D00(,

(:(:(<(<#&u::??%-/259~~(=(='B	, B	, B	, B	, B	, B	, B	, B	,( ,6 %- %- %-(,%- "&,!;!;J&0&6&6&8&8 . .
U+s22z%#u7V7V2#&u::--5B	, B	, B	, B	, B	, B	, B	, B	,B k))zzZ"?"?"?@@#s**99;;D!XXh33 " ",QUU5"-=-=uEE "IZ[\[`[`aegi[j[jlqIrIr "().@"(E(E 4 4&*hhx&<&<&)gg.>&?&?#& !4:cC<+H+H !4+.s88OOOOYB	, B	, B	, B	, B	, B	, B	, B	,Z "E" ::????@@D3&&yy{{hh//g488<L3M3MgQUQYQYZfQgQg $:cC<88 $s88mB	, B	, B	, B	, B	, B	, B	, B	,t ::77788D3&&yy{{"hhvr22$ , ,A(tR%@@ ,eeO44f>N8O8OfSTSXSXYeSfSf ,:cC<#@#@ ,#&s88OOEB	, B	, B	, B	, B	, B	, B	, B	, B	, B	, B	, B	, B	, B	, B	, B	, B	, B	, B	, B	, B	, B	, B	,F     4s   A* *A98A9=S B2R7E*R7S *
E74R76E77A#R7S 'DR79S B(R7.S ;C"R7S *R7+S 7R;;S >R;?S 
SSc                 .    |                      dd          S )zNormalize version separators for matching.

    Nous uses dashes: claude-opus-4-6, claude-sonnet-4-5
    OpenRouter uses dots: claude-opus-4.6, claude-sonnet-4.5
    Normalize both to dashes for comparison.
    r   -)r  r  s    r   _normalize_model_versionr    s     ==c"""r   c                 r   |r|                     d          rdS 	 |                    d          }|                    d          r
|dd         }| d}|dd}t          j        ||d	t                      
          }|j        dk    rdS |                                }|                    dg           D ]O}|                    d          | k    r4|                    d          }	t          |	t                    r
|	dk    r|	c S Pn2# t          $ r%}
t                              d|
           Y d}
~
nd}
~
ww xY wdS )zQuery Anthropic's /v1/models endpoint for context length.

    Only works with regular ANTHROPIC_API_KEY (sk-ant-api*).
    OAuth tokens (sk-ant-oat*) from Claude Code return 401.
    z
sk-ant-oatNr   r   r   z/v1/models?limit=1000z
2023-06-01)z	x-api-keyzanthropic-versionr   rn  r   r
  r^  r   r   z%Anthropic /v1/models query failed: %s)rj   r   r   rc  r  r   r  r  r   r   r   re  rf  )re   r   r   r  r   r   r  r
  rs  r  rj  s              r   _query_anthropic_context_lengthr    sz     g((66 tAs##== 	9D,,, !-
 
 |C"E]E_E_```s""4yy{{&"%% 	 	AuuT{{e##ee.//c3'' C!GGJJJ		
  A A A<a@@@@@@@@A4s%   A-D 
A8D D 
D4D//D4i& )
zgpt-5.1-codex-maxzgpt-5.1-codex-minizgpt-5.3-codexr   zgpt-5.2-codexr   r~   r   zgpt-5.2r   _CODEX_OAUTH_CONTEXT_FALLBACK_codex_oauth_context_cacheg        _codex_oauth_context_cache_timeaccess_tokenc                 0   t          j                     }t          r|t          z
  t          k     rt          S 	 t	          j        ddd|  idt                                }|j        dk    r"t          	                    d|j                   i S |
                                }n4# t          $ r'}t          	                    d|           i cY d	}~S d	}~ww xY wt          |t                    r|                    d
g           ng }i }|D ]}t          |t                    s|                    d          }|                    d          }	t          |t                    r2t          |	t                    r|	dk    r|	||                                <   |r|a|a|S )ak  Probe the ChatGPT Codex /models endpoint for per-slug context windows.

    Codex OAuth imposes its own context limits that differ from the direct
    OpenAI API (e.g. gpt-5.5 is 1.05M on the API, 272K on Codex). The
    `context_window` field in each model entry is the authoritative source.

    Returns a ``{slug: context_window}`` dict. Empty on failure.
    zAhttps://chatgpt.com/backend-api/codex/models?client_version=1.0.0r   r   r   rn  r   zHCodex /models probe returned HTTP %s; falling back to hardcoded defaultszCodex /models probe failed: %sNr   slugr   r   )ra  r  r  _CODEX_OAUTH_CONTEXT_CACHE_TTLrc  r  r   r  re  rf  r  r   r   r  r   r   rl   )
r  nowr  r
  r  entriesr  r  r  r  s
             r   "_fetch_codex_oauth_context_lengthsr  [  s    )++C"*114RRR))|O$&>&>&>?+--	
 
 
 s""LLZ    Iyy{{   5s;;;						 )34(>(>Fdhhx$$$BGF ' '$%% 	xxhh'((dC   	'ZS%9%9 	'cAgg#&F4::<<  .%+"*-'Ms$   AB! B! !
C+CCCc                    t          |                                           }|sdS |ret          |          }||v r||         S |                                }|                                D ]!\  }}|                                |k    r|c S "|                                }t          t                                          d d          D ]\  }}||v r|c S dS )zResolve a Codex OAuth model's real context window.

    Prefers a live probe of chatgpt.com/backend-api/codex/models (when we
    have a bearer token), then falls back to ``_CODEX_OAUTH_CONTEXT_FALLBACK``.
    Nc                 ,    t          | d                   S Nr   r   xs    r   <lambda>z5_resolve_codex_oauth_context_length.<locals>.<lambda>  s    S1YY r   Tr$  reverse)rs   rl   r  rm   r   sortedr  )re   r  
model_barelivemodel_lowerr  r  s          r   #_resolve_codex_oauth_context_lengthr    s    (..4466J t 1,??
## &&(( 	 	ID#zz||{**


 + ""$$K%++--3F3FPT    	c ;JJJ  4r   c                 8   |rt          | ||          }||dfS t                      }dt          dt          dt          t
                   fd}| |v r || ||                    }||dfS t          |                                           }|                                D ]\  }}	d	|v r|	                    d	d
          d
         n|}
|
                                |                                 k    s%t          |
                                          |k    r |||	          }||dfc S |                                 }|                                D ]\  }}	d	|v r|	                    d	d
          d
         n|}
|
                                |ft          |
                                          |ffD ]g\  }}|
                    |          rMt          |          t          |          k    s|t          |                   dv r |||	          }||dfc c S hdS )uM  Resolve Nous Portal model context length.

    Tries the live Nous inference endpoint first (authoritative), then falls
    back to OpenRouter metadata with suffix/version matching.

    Nous model IDs are bare after prefix-stripping (e.g. 'qwen3.6-plus',
    'claude-opus-4-6') while OpenRouter uses prefixed IDs (e.g.
    'qwen/qwen3.6-plus', 'anthropic/claude-opus-4.6').  Version
    normalization (dot↔dash) is applied to handle name drifts.

    Returns ``(context_length, source)`` where ``source`` is one of:
      - ``"portal"``    — live /v1/models response (authoritative)
      - ``"openrouter"`` — OpenRouter cache fallback (non-authoritative;
        callers must NOT persist this to the on-disk cache or a single
        portal blip will freeze the wrong value in forever)
      - ``""``           — could not resolve
    rm  Nportalor_idrV  r   c                     |                     d          }|d S |dk    r-t          |           rt                              d||            d S |S )Nr      z{Rejecting OpenRouter metadata context=%s for %r (Kimi-family underreport, Nous path); falling through to hardcoded defaults)r  r  re  r  )r  rV  r  s      r   	_safe_ctxz/_resolve_nous_context_length.<locals>._safe_ctx  sa    ii());4%<<5e<<<KK^U  
 4
r   rY   r   ri   z-:.)Nr   )r  rk  r   r  r   r   r  rm   r   rk   rj   r   )re   r   r   
portal_ctxmetadatar  r  r   r  rV  barer  r  querys                 r   _resolve_nous_context_lengthr    sf   4  (5eXwWWW
!x''#%%H T hsm     ix//?$$)%006688J (( ) )u),u{{3""1%%5::<<5;;==((,DT,J,J,P,P,R,RV`,`,`)E5))CL((((++--K (( - -u),u{{3""1%%5"&**,,!<?WX\?]?]?c?c?e?egq>r s 	- 	-Iu##E** -I#e**,,	#e**0E0N0Niu--?,,,,,,	- 8r   config_context_lengthr   custom_providersc                 P   |t          |t                    r|dk    r|S |r.|r,| r*	 ddlm}  || ||          }|r|S n# t          $ r Y nw xY wt          |           } |rX|dk    rQt          | |          }|>|dk    r7|dk    r1t                              d| ||d	           t          | |           n|d
k    r?t          |           r0t                              d| ||d	           t          | |           n|dk    r?t          |           r0t                              d| ||d	           t          | |           nw|dk    r?t          |           r0t                              d| ||d	           t          | |           n2t          |          dk    rt                              d| |           n|S |dk    s4|rTt          |                              d          r2t#          |d          r"	 ddlm}	  |	|           S # t(          $ r Y nw xY w|dk    s|r;t#          |d          r+t+          | |pd|          }
|
|rt-          | ||
           |
S t/          |          r4t1          |          s$t+          | ||          }||S t1          |          st3          | ||          }
|
t-          | ||
           |
S t5          |          r3t7          | ||          }|r|dk    r|dk    rt-          | ||           |S t                              d| |t8          d	           |                                 }t=          t>                                           d d          D ],\  }}||v r#t                              d|d	| |           |c S -t8          S |dk    s|r*t          |          d k    rtC          | |pd!|          }
|
r|
S |}|r|d"v r|rt          |          }|r|}|d#v r)	 dd$l"m#}  || |          }
|
r|
S n# t          $ r Y nw xY w|dk    r6tI          | |pd%|pd%&          \  }
}|
r|r|d'k    rt-          | ||
           |
S |dk    r*tK          | |pd%(          }|r|rt-          | ||           |S |d)k    r|rt+          | ||          }
|
|
S |r't3          | ||          }
|
t-          | ||
           |
S |d*k    rltM                      }|'                    |           }|rG|'                    d+          }t          |t                    r|dk    r|d
k    rt          |           s|S |rhdd,l(m)}  |||           }
|
rTt          |           rCt>          '                    d-          }|r'|
|k     r!t                              d.|
| |d	           |}
|
S |sgtM                      }| |v rU||          '                    d+t8                    }|d
k    r,t          |           rt                              d/||            n|S |                                 }t=          t>                                           d0 d          D ]\  }}||v r|c S |rBt5          |          r3t7          | ||          }|r|dk    r|dk    rt-          | ||           |S t8          S )1a  Get the context length for a model.

    Resolution order:
    0. Explicit config override (model.context_length or custom_providers per-model)
    1. Persistent cache (previously discovered via probing).  Nous URLs
       bypass the cache here so step 5b can always reconcile against
       the authoritative portal /v1/models response.
    1b. AWS Bedrock static table (must precede custom-endpoint probe)
    2. Active endpoint metadata (/models for explicit custom endpoints)
    3. Local server query (for local endpoints)
    4. Anthropic /v1/models API (API-key users only, not OAuth)
    5. Provider-aware lookups (before generic OpenRouter cache):
       a. Copilot live /models API
       b. Nous: live /v1/models probe first (authoritative), then OR
          cache fallback with suffix/version normalisation.  Only
          portal-derived values are persisted to disk.
       c. Codex OAuth /models probe
       d. GMI /models endpoint
       e. Ollama native /api/show probe (any base_url, provider-agnostic)
       f. models.dev registry lookup (with :cloud/-cloud suffix fallback)
    6. OpenRouter live API metadata (Kimi-family 32k guard)
    7. Hardcoded defaults (broad family patterns, longest-key-first)
    8. Local server query (last resort)
    9. Default fallback (256K)Nr   )"get_custom_provider_context_length)re   r   r  lmstudior`   r   zaDropping stale Codex cache entry %s@%s -> %s (pre-fix value); re-resolving via live /models prober  r  ziDropping stale Kimi cache entry %s@%s -> %s (OpenRouter underreport); re-resolving via hardcoded defaultsr   zjDropping stale MiniMax-M3 cache entry %s@%s -> %s (pre-catalog value); re-resolving via hardcoded defaultsrz   zhDropping stale Grok-4.3 cache entry %s@%s -> %s (pre-catalog value); re-resolving via hardcoded defaultsr:   z@Bypassing persistent cache for %s@%s (Nous portal authoritative)bedrockzbedrock-runtime.zamazonaws.com)get_bedrock_context_lengthrE   r   zhttps://api.novita.ai/openai/v1rm  u   Could not detect context length for model %r at %s — defaulting to %s tokens (probe-down). Set model.context_length in config.yaml to override.c                 ,    t          | d                   S r  r  r  s    r   r	  z*get_model_context_length.<locals>.<lambda>  s    c!A$ii r   Tr
  zUUsing hardcoded context length %s for model %r (custom endpoint, catalog match on %r)rW   r   zhttps://api.anthropic.com>   rA   rY   >   rK   r\   ra   )get_copilot_model_contextr   )r   r   r  )r  r1   rY   r   )lookup_models_dev_contextr   z[Rejecting models.dev context=%s for %r (MiniMax-M3 underreport); using hardcoded default %szpRejecting OpenRouter metadata context=%s for %r (Kimi-family underreport); falling through to hardcoded defaultsc                 ,    t          | d                   S r  r  r  s    r   r	  z*get_model_context_length.<locals>.<lambda>X  s    s1Q4yy r   )*r   r   hermes_cli.configr  r   rs   r  re  r  r  r  r  r  r   rf  r   rj   r
   agent.bedrock_adapterr"  ImportErrorr  r  r   r   r  r   r  DEFAULT_FALLBACK_CONTEXTrm   r  DEFAULT_CONTEXT_LENGTHSr   r  hermes_cli.modelsr$  r  r  rk  r  agent.models_devr%  )re   r   r   r  r   r  r  cp_ctxry  r"  r  r   	local_ctxr  default_modelr  effective_providerinferredr$  source	codex_ctxr  rV  or_ctxr%  catalogs                             r   get_model_context_lengthr7    s	   B (Z8Ms-S-S(XmpqXqXq$$  H  
	LLLLLL77!!1  F
   	 	 	D	 #5))E  AH
***5(;; >))f.?.?:8]]  
 2%BBBB5%>u%E%E:8]]  
 2%BBBB 7""'Fu'M'M":8]]  
 2%BBBB 7""'DU'K'K":8]]  
 2%BBBB *(33v==V8     9 h''223EFF  "(O<< 
	HHHHHH--e444 	 	 	D	 8-B8_-]-].uh6cBcmtuuu? :#E8S999J 8$$ -,-H-R-R -,9%SZ[[[%!!*844 )	, )'JJJC#E8S999
 ** %7xQXYYY	 %Q:--+E8YGGG$$KK. x$<!@!@	    ++--K)/'--//''* * * " "%v
 !K//KKA!um  
 "MMM 0 ,+ ; &x004GGG-eX5\A\^eff 	J " .!37O!O!O 	./99H .%-" III	CCCCCC++E7CCCC 
 	 	 	D	 V##2HNGMr
 
 
V  		  :Fh..#E8S999J^++ 8GMWYZZZ	 	 @#E8Y???U""x" /uhPPP?J  $UHgFFF?x555J \))'))U## 	YY/00F &#&& 6A::%$=e$D$D >>>>>>''(:EBB 	 /u55 "155lCC "sW}}KKOUwNN  
 "CJ  '))He_(()9;STTF#<U#C#CWE     ++--K!'%%''-@-@$" " "  v K''MMM (  %h// /xQQQ	 	Q:%%#E8Y??? $#s5   A   
AA5H 
HH1P	 	
PPr  c                 4    | sdS t          |           dz   dz  S )a  Rough token estimate (~4 chars/token) for pre-flight checks.

    Uses ceiling division so short texts (1-3 chars) never estimate as
    0 tokens, which would cause the compressor and pre-flight checks to
    systematically undercount when many short tool results are present.
    r   r  r   r  )r  s    r   estimate_tokens_roughr9  i  s&      qIIMar   messagesc                 x    d}d}d}| D ]'}|t          |          z  }|t          ||          z  }(|dz   dz  |z   S )ue  Rough token estimate for a message list (pre-flight only).

    Image parts (base64 PNG/JPEG) are counted as a flat ~1500 tokens per
    image — the Anthropic pricing model — instead of counting raw base64
    character length. Without this, a single ~1MB screenshot would be
    estimated at ~250K tokens and trigger premature context compression.
    i  r   r  r   )_estimate_message_chars_count_image_tokens)r:  _IMAGE_TOKEN_COSTtotal_charsimage_tokensmsgs        r   estimate_messages_tokens_roughrB  u  sd     KL D D.s333+C1BCCC1_"l22r   rA  cost_per_imagec                 
   d}t          | t                    r|                     d          nd}t          |t                    r9|D ]6}t          |t                    s|                    d          }|dv r|dz  }7t          | t                    r|                     d          nd}t          |t                    r8|D ]5}t          |t                    r|                    d          dk    r|dz  }6t          |t                    ru|                    d	          r`|                    d          }t          |t                    r6|D ]3}t          |t                    r|                    d          d
v r|dz  }4||z  S )zECount image-like content parts in a message; return their token cost.r   contentNtype>   image	image_urlinput_imageri   _anthropic_content_blocksrG  _multimodal>   rG  rH  )r   r  r  r  )rA  rC  countrE  partptypestashedinners           r   r=  r=    s   E$.sD$9$9Ccggi   tG'4    	 	DdD)) HHV$$E===
6@d6K6KUcgg1222QUG'4    	 	D$%% $((6*:*:g*E*E
'4   W[[%?%? I&&eT"" 	  dD)) dhhv.>.>BX.X.XQJE>!!r   c                    t          | t                    st          t          |                     S i }|                                 D ]\  }}|dk    r|dk    rt          |t
                    rg }|D ]}t          |t                    rY|                    d          dv r,|                    |                    d          dd           Z|                    |           p|                    |           |||<   t          |t                    r/|                    d          r|                    dd	          ||<   |||<   |||<   
t          t          |                    S )
zChar count for token estimation, excluding base64 image data.

    Base64 images are counted via `_count_image_tokens` instead; including
    their raw chars here would massively overestimate token usage.
    rJ  rE  rF  >   rG  rH  rI  z
[stripped])rF  rG  rK  text_summaryr   )r   r  r   r   r   r  r  rw  )rA  shadowkvcleanedrM  s         r   r<  r<    sv    c4   3s88}}F		  1+++	>>!T""  - -D!$-- -88F++/TTT#NNDHHV4D4D|+\+\]]]]#NN40000t,,,,#q		At$$ })=)= EE."55q		q		F1IIs6{{r   )system_prompttoolsrW  rX  c                    d}|r|t          |          dz   dz  z  }| r|t          |           z  }|r%|t          t          |                    dz   dz  z  }|S )u  Rough token estimate for a full chat-completions request.

    Includes the major payload buckets Hermes sends to providers:
    system prompt, conversation messages, and tool schemas.  With 50+
    tools enabled, schemas alone can add 20-30K tokens — a significant
    blind spot when only counting messages. Image content is counted
    at a flat per-image cost (see estimate_messages_tokens_rough).
    r   r  r   )r   rB  r   )r:  rW  rX  totals       r   estimate_request_tokens_roughr[    su     E /#m$$q(Q.. :/999 ,#c%jj//A%!++Lr   )r   )r  r  )F)r   F)r   r   )r   r   Nr   N)v__doc__r   loggingr   r  ra  pathlibr   typingr   r   r   r   r   urllib.parser	   rc  r  utilsr
   r   r  r   	getLogger__name__re  r   r   r   	frozensetrd   __annotations__compile
IGNORECASErn   IPv4Networkr   rs   rt   ru   rM  rv   rw   rb  rx   ry   rv  r  r*  MINIMUM_CONTEXT_LENGTHr+  r   r   r+  r0  r   r   r   r   r   r   r   	providersr   _list_providers_ppget_hostname_hostr   r   r   r   r   r  r  r   r  tupler)  r-  r1  rS  rZ  rk  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r7  r9  rB  r=  r<  r[  r"  r   r   <module>rp     s          				 				        3 3 3 3 3 3 3 3 3 3 3 3 3 3 ! ! ! ! ! !   : : : : : : : : 2 2 2 2 2 2		8	$	$$*    * &/Y 0 0 0 & & IcN   4 !bjLM   )9(99 # #    & 46 tCc3h/0 5 5 5$% E % % %46 S$sCx.01 6 6 6%& U & & & GI S$sDcN/B*C%C D I I I8: #T#u*%5 : : :     /q1 
   v
 gv Gv wv wv wv wv wv v wv v  f!v. w/v0 F1v2 F3v4 w5vB 6CvD FEv vF VGvH wIvJ VKvN gOvR vSvT fUvV 6WvX vYvZ T[vj ykvl mvn Yovp qvr svv Vwv| G}v~ v v v@ 6AvB FCvN 'OvP vQvT 
6Uvb &cvd fevf Tgvh 7ivj kvl mvn fovp fqvr fsvt Fuvx Fyv@ 6Av v vD EvH vIvL MvP fQvR FSvT  UvV FWvX FYvZ "6[v\ f]v^ _v` 7avb Wcvd evf Fgvh Vivj Vkv v J! T# T$ T T T T$   < 0# 0# 0 0 0 00 03 0S#X 0 0 0 0<c <d < < < <H# H$ H H H H
&$h&$8&$ &$ 	&$
 &$ }&$ '&$ M&$ i&$ y&$ G&$ 9&$ i&$ "9&$ l&$  \!&$" (#&$$ '-"&! &/#  ("  2 K&$ &$ &$ $sCx. & & &T	;;;;;;   / /  "" 	/U"222&)hU#/  	 	 	D	s x}    $:# :$ : : : :5 5 5 5 5 5p: :s :S :(3- : : : :z0c 0 0 0 0 #  S ZbcfZg    	S#X 	eCHo 	(SV- 	 	 	 	=T#s(^ = = = = ==DcN =x} = = = =d38n c3h    B,d3S#X#67 ,3 ,tTWY\T\~ ,bf , , , ,!+ !+ !+c4S>>Q9R !+ !+ !+ !+L M MMM M 
#tCH~
	M M M Mf    c]	   .; ; ; ; ;T#s(^    Cs Cc C3 C4 C C C C*S C HSM    WS WC WD W W W W      c hsm    8 c]   (T T T T T TnC s t    &4 4 4s 4S 4(SV- 4 4 4 4n: :# : :s :HUXM : : : :z	;S 	;T 	; 	; 	; 	;	)3 	)4 	) 	) 	) 	)	' 	' 	' 	' 	' 	'[ [s [c [C [QYZ]Q^ [ [ [ [|#C #C # # # #3 #  PXY\P]    P !! #1 1 tCH~   & .0 DcN / / /),  , , ,!% 0S 0T#s(^ 0 0 0 0h %' !c]   F G GGG G 8C=#	G G G GX (,$(m$ m$m$m$ m$ :	m$
 m$ Tkm$ 	m$ m$ m$ m$`	  	  	  	  	  	 3T$sCx.-A 3c 3 3 3 3""T#s(^ "S "S " " " "4c3h C    H ,0	  4S>"  Dc3h()	
 	     s   7J? ?KK