+
    i4                       a  R tH0 t R t^ RIt^ RIt^ RIt^ RIt^ RIHt ^ RIH	t	H
t
HtHt ^ RIHt ^ RIt^ RIt^ RIHt ]P&                  ! ]4      t]! 0 RkRkRkR	kR
kRkRkRkRkRkRkRkRkRkRkRkRkRkRkRkRkRkRkRkRkRkR kR!kR"kR#kR$kR%kR&kR'kR(kR)kR*kR+kR,kR-kR.k4      t] ^ k ]P0                  ! R/]P2                  4      tR0 R1 lt/ s] ^k ^ s] ^k R2t/ t] ^k / t ] ^k R3t!. ROt"]"^ ,          t#/ R5R6bR7R6bR8R6bR9R6bR%R:bR;R<bR=R4bR>R4bRR?bR@RAbRBRAbRCRDbRERFbRR4bRGRDbR.RDbRRHbRRIR#RJRKRJRLRDRMRDRNRORPRJRQRJRRRHRSRTRUR?RVR?RWRI/Ct$Rt%Rt&Rt'RX RY lt(RZ R[ lt)R\ R] lt*/ R^R_bR`R_bRaRbRbRbRcRbRdRbReRbRfRbRgRbRhRbRiRbRjRbRkRbRlR	bRmR	bRnRobt+] ^k Rp Rq lt,Rr Rs lt-Rt Ru lt.Rv Rw lt/Rx Ry lt0RRz R{ llt1R| R} lt2R~ R lt3R R lt4R R lt5R R lt6RR R llt7RR R llt8R R lt9R R lt:R R lt;R R lt<R R lt=R R lt>R R lt?R R lt@R R ltAR R ltBR R ltCRR R lltDR R ltER R ltFRRRR/R R lltGR# )zModel metadata, context lengths, and token estimation utilities.

Pure utility functions with no AIAgent dependency. Used by ContextCompressor
and run_agent.py for pre-flight context checks.
NPath)AnyDictListOptional)urlparse)OPENROUTER_MODELS_URL
openrouternouszopenai-codexcopilotzcopilot-acpgeminizaizkimi-codingminimaxz
minimax-cn	anthropicdeepseekzopencode-zenzopencode-goz
ai-gatewaykilocodealibabacustomlocalgooglezgoogle-geminizgoogle-ai-studioglmzz-aizz.aizhipugithubzgithub-copilotzgithub-modelskimimoonshotclaudez	deep-seekopencodezengovercelkilo	dashscopealiyunqwenzE^(\d+\.?\d*b|latest|stable|q\d|fp?\d|instruct|chat|coder|vision|text)c                0    V ^8  d   QhR\         R\         /#    modelreturnstr)formats   "1/home/ubuntu/hermes-agent/agent/model_metadata.py__annotate__r.   ,   s      # #     c                   RV 9  g   V P                  R4      '       d   V # V P                  R^4      w  rVP                  4       P                  4       pV\        9   d.   \
        P                  VP                  4       4      '       d   V # V# V # )uM  Strip a recognised provider prefix from a model string.

``"local:my-model"`` → ``"my-model"``
``"qwen3.5:27b"``   → ``"qwen3.5:27b"``  (unchanged — not a provider prefix)
``"qwen:0.5b"``     → ``"qwen:0.5b"``    (unchanged — Ollama model:tag)
``"deepseek:latest"``→ ``"deepseek:latest"``(unchanged — Ollama model:tag)
:http)
startswithsplitstriplower_PROVIDER_PREFIXES_OLLAMA_TAG_PATTERNmatch)r(   prefixsuffixprefix_lowers   &   r-   _strip_provider_prefixr=   ,   sv     %5++F33[[a(NF<<>'')L))$$V\\^44LLr/   i  i,    zclaude-opus-4-6i@B zclaude-sonnet-4-6zclaude-opus-4.6zclaude-sonnet-4.6i@ zgpt-4.1i zgpt-5zgpt-4i   zgemma-4-31bi  zgemma-4-26bzgemma-3i   gemmai    llamai   i  i   trinityzQwen/Qwen3.5-397B-A17BzQwen/Qwen3.5-35B-A3Bzdeepseek-ai/DeepSeek-V3.2i   zmoonshotai/Kimi-K2.5zmoonshotai/Kimi-K2-ThinkingzMiniMaxAI/MiniMax-M2.5zXiaomiMiMo/MiMo-V2-Flashi   zmimo-v2-prozmimo-v2-omnizzai-org/GLM-5c                0    V ^8  d   QhR\         R\         /# r'   base_urlr)   r*   )r,   s   "r-   r.   r.      s     0 0# 0# 0r/   c                 T    T ;'       g    R P                  4       P                  R4      # ) /)r5   rstriprD   s   &r-   _normalize_base_urlrJ      s"    NN!!#**3//r/   c                0    V ^8  d   QhR\         R\        /# rC   r+   bool)r,   s   "r-   r.   r.      s     D Dc Dd Dr/   c                 :    R \        V 4      P                  4       9   # )openrouter.ai)rJ   r6   rI   s   &r-   _is_openrouter_base_urlrP      s    1(;AACCCr/   c                0    V ^8  d   QhR\         R\        /# rC   rL   )r,   s   "r-   r.   r.      s     H H# H$ Hr/   c                 ^    \        V 4      p\        V4      ;'       d    \        V4      '       * # N)rJ   rM   rP   )rD   
normalizeds   & r-   _is_custom_endpointrU      s(    $X.J
GG$;J$G GGr/   zapi.openai.comopenaizchatgpt.comapi.anthropic.comzapi.z.aizapi.moonshot.aizapi.kimi.comzapi.minimaxzdashscope.aliyuncs.comzdashscope-intl.aliyuncs.comrO   z!generativelanguage.googleapis.comzinference-api.nousresearch.comzapi.deepseek.comzapi.githubcopilot.comzmodels.github.aizapi.fireworks.ai	fireworksc                F    V ^8  d   QhR\         R\        \         ,          /# rC   r+   r   )r,   s   "r-   r.   r.      s      s x} r/   c                $   \        V 4      pV'       g   R# \        RV9   d   TMRV 24      pVP                  P                  4       ;'       g    VP                  P                  4       p\
        P                  4        F  w  rEWC9   g   K  Vu # 	  R# )zInfer the models.dev provider name from a base URL.

This allows context length resolution via models.dev for custom endpoints
like DashScope (Alibaba), Z.AI, Kimi, etc. without requiring the user to
explicitly set the provider name in config.
N://zhttps://)rJ   r   netlocr6   path_URL_TO_PROVIDERitems)rD   rT   parsedhosturl_partproviders   &     r-   _infer_provider_from_urlre      s{     %X.JEZ$7jx
|=TUF== 77FKK$5$5$7D.446O 7 r/   c                0    V ^8  d   QhR\         R\        /# rC   rL   )r,   s   "r-   r.   r.      s     : :# :$ :r/   c                     \        V 4      R J# rS   )re   rI   s   &r-   _is_known_provider_base_urlrh      s    #H-T99r/   c                0    V ^8  d   QhR\         R\        /# rC   rL   )r,   s   "r-   r.   r.      s     ! ! ! !r/   c                   \        V 4      pV'       g   R# RV9   d   TMRV 2p \        V4      pVP                  ;'       g    RpT\        9   d   R# ^ RIp TP                  T4      pTP                  ;'       g!    TP                  ;'       g    TP                  #   \         d     R# i ; i  \         d     Mi ; iTP                  R4      p\        T4      ^8X  dl    \        T^ ,          4      \        T^,          4      rT^
8X  d   R# T^8X  d   ^T	u;8:  d
   ^8:  d    R#  T^8X  d
   T	^8X  d   R# R#   \         d     R# i ; iR# )zOReturn True if base_url points to a local machine (localhost / RFC-1918 / WSL).Fr\   zhttp://rF   TN.)rJ   r   hostname	Exception_LOCAL_HOSTS	ipaddress
ip_address
is_privateis_loopbackis_link_local
ValueErrorr4   lenint)
rD   rT   urlra   rb   ro   addrpartsfirstseconds
   &         r-   is_local_endpointr|      sB   $X.J+*7:,1GC#$$" |##D)HH$"2"2HHd6H6HH     JJsOE
5zQ		aM3uQx=6{|f 2 2 !3|#   		sS   B B #B/ <B/ B/ B,+B,/B=<B="*D: D: 'D: :E	E	c                F    V ^8  d   QhR\         R\        \         ,          /# rC   rZ   )r,   s   "r-   r.   r.      s     8 8s 8x} 8r/   c                   ^ RI p\        V 4      pTpVP                  R4      '       d   VRR p VP                  RR7      ;_uu_ 4       p VP	                  V R24      pVP
                  ^8X  d    RRR4       R#   VP	                  V R24      pVP
                  ^8X  d%    VP                  4       pRV9   d    RRR4       R	# M  VP	                  V R
24      pVP
                  ^8w  d   VP	                  V R24      pVP
                  ^8X  d   RVP                  9   d    RRR4       R#  TP	                  T R24      pTP
                  ^8X  d#   TP                  4       pRT9   d    RRR4       R# RRR4       R#   \         d     ELi ; i  \         d     Li ; i  \         d     Li ; i  \         d     Li ; i  \         d     L\i ; i  + '       g   i     R# ; i  \         d     R# i ; i)zDetect which local server is running at base_url by probing known endpoints.

Returns one of: "ollama", "lm-studio", "vllm", "llamacpp", or None.
N/v1g       @timeout/api/v1/models	lm-studioz	/api/tagsmodelsollama	/v1/props/propsdefault_generation_settingsllamacppz/versionversionvllm)	httpxrJ   endswithClientgetstatus_coderm   jsontext)rD   r   rT   
server_urlclientrdatas   &      r-   detect_local_server_typer      s   
 $X.JJ5!!_
*\\#\&&&JJ*^<===C'& '& (
JJ*Y78==C' vvx#t+#+# '&  , (JJ*Y78==C'

j\#89A==C',IQVV,S%9 '&@JJ*X67==C'668D D(%K '&&V K   %      M '&V   s   G* 	G%F 1G* ;G=%F##F;G* GAF4$G* /<G,G* 6G*  FGFGF F#GF  F##F1.G0F11G4G?GGGGGGGG'	!G* 'G* *G98G9c                $    V ^8  d   QhR\         /# )r'   value)r   )r,   s   "r-   r.   r.   6  s     0 0c 0r/   c              #   
  "   \        V \        4      '       d1   V x  V P                  4        F  p\        V4       R j  xL
  K  	  R # \        V \        4      '       d   V  F  p\        V4       R j  xL
  K  	  R # R #  L@ L5irS   )
isinstancedictvalues_iter_nested_dictslist)r   nesteditems   &  r-   r   r   6  si     %llnF)&111 %	E4	 	 D)$///  
! 2 0s!   <BA?3B2B3BBc          	      ^    V ^8  d   QhR\         R\        R\        R\        \        ,          /# )r'   r   minimummaximumr)   )r   rv   r   )r,   s   "r-   r.   r.   @  s/      #  S ZbcfZg r/   c                     \        V \        4      '       d   R # \        V \        4      '       d!   V P                  4       P	                  RR4      p \        V 4      pYu;8:  d
   T8:  d    T#  R # R #   \        \        3 d     R # i ; i)N,rF   )r   rM   r+   r5   replacerv   	TypeErrorrt   )r   r   r   results   &&& r-   _coerce_reasonable_intr   @  sx    eT""eS!!KKM))#r2EU #G# $	 z" s   A1 AA1 1BBc                    V ^8  d   QhR\         \        \        3,          R\        \        R3,          R\        \
        ,          /# )r'   payloadkeys.r)   )r   r+   r   tupler   rv   )r,   s   "r-   r.   r.   N  s6     	 	S#X 	eCHo 	(SV- 	r/   c                    V Uu0 uF  q"P                  4       kK  	  pp\        V 4       FQ  pVP                  4        F:  w  r%\        V4      P                  4       V9  d   K%  \	        V4      pVf   K6  Vu u # 	  KS  	  R # u upi rS   )r6   r   r`   r+   r   )r   r   keykeysetmappingr   coerceds   &&     r-   _extract_first_intr   N  sp    %)*TciikTF*%g.!--/JC3x~~v-,U3G" * /  +s   Bc                h    V ^8  d   QhR\         \        \        3,          R\        \        ,          /# r'   r   r)   r   r+   r   r   rv   )r,   s   "r-   r.   r.   Z  s&     = =T#s(^ = =r/   c                 "    \        V \        4      # rS   )r   _CONTEXT_LENGTH_KEYSr   s   &r-   _extract_context_lengthr   Z      g';<<r/   c                h    V ^8  d   QhR\         \        \        3,          R\        \        ,          /# r   r   )r,   s   "r-   r.   r.   ^  s&     = =DcN =x} =r/   c                 "    \        V \        4      # rS   )r   _MAX_COMPLETION_KEYSr   s   &r-   _extract_max_completion_tokensr   ^  r   r/   c                t    V ^8  d   QhR\         \        \        3,          R\         \        \        3,          /# r   r   r+   r   )r,   s   "r-   r.   r.   b  s*      d38n c3h r/   c           
      .  a	 R RRR	RR
RRRR/p\        V 4       F  pVP                  4        UUu/ uF  w  r4\        V4      P                  4       VbK!  	  uppo	\        ;QJ d-    V	3R lVP                  4        4       F  '       g   K   RM"	  RM! V	3R lVP                  4        4       4      '       g   K  / pVP                  4        F3  w  rgV F(  pVS	9   g   K  S	V,          R9  g   K  S	V,          WV&    K1  	  K5  	  V'       g   K  Vu # 	  / # u uppi )prompt
completionrequest
cache_readcache_writec              3      <"   T F?  p\         ;QJ d    V3R  lV 4       F  '       g   K   RM	  RM! V3R  lV 4       4      x  KA  	  R# 5i)c              3   ,   <"   T F	  qS9   x  K  	  R # 5irS    ).0aliasrT   s   & r-   	<genexpr>-_extract_pricing.<locals>.<genexpr>.<genexpr>l  s     @u
*s   TFN)any)r   aliasesrT   s   & r-   r   #_extract_pricing.<locals>.<genexpr>l  s1     cPbW33@@333@@@@Pbs   A
A
 A
TF)r   inputinput_cost_per_tokenprompt_token_cost)r   outputoutput_cost_per_tokencompletion_token_cost)r   request_cost)r   cached_promptinput_cache_readcache_read_cost_per_token)r   cache_creationinput_cache_writecache_write_cost_per_token)NrF   )r   r`   r+   r6   r   r   )
r   	alias_mapr   r   r   pricingtargetr   r   rT   s
   &        @r-   _extract_pricingr   b  s    R`.fkI &g.@GP*#c#hnn&-P
scPYP`P`PbcssscPYP`P`Pbccc"$(0OF J&:e+<J+N&0&7GO !  1
 7N / I Qs   %Dc                    V ^8  d   QhR\         \        \         \        \        3,          3,          R\        R\         \        \        3,          RR/# )r'   cachemodel_identryr)   Nr   )r,   s   "r-   r.   r.   y  sF     , ,d3S#X#67 ,3 ,tTWY\T\~ ,bf ,r/   c                 t    W V&   R V9   d-   VP                  R ^4      ^,          pV P                  W24       R# R# )rG   N)r4   
setdefault)r   r   r   
bare_models   &&& r-   _add_model_aliasesr   y  s8    (O
h^^C+A.
+ r/   c                t    V ^8  d   QhR\         R\        \        \        \        \        3,          3,          /# )r'   force_refreshr)   )rM   r   r+   r   )r,   s   "r-   r.   r.     s,     !+ !+ !+c4S>>Q9R !+r/   c                z   V '       g<   \         '       d0   \        P                  ! 4       \        ,
          \        8  d   \         #  \        P
                  ! \        ^
R7      pVP                  4        VP                  4       p/ pVP                  R. 4       F  pVP                  RR4      pRVP                  RR4      RVP                  R/ 4      P                  RR	4      R
VP                  R
V4      RVP                  R/ 4      /p\        W5V4       VP                  RR4      pV'       g   K  Wu8w  g   K  \        W7V4       K  	  Vs \        P                  ! 4       s\        P                  R\        V4      4       V#   \         d4   p\        P                  ! RT 24       \         ;'       g    / u Rp?# Rp?ii ; i)z9Fetch model metadata from OpenRouter (cached for 1 hour).r   r   idrF   context_lengthr>   max_completion_tokenstop_provideri   namer   canonical_slugz.Fetched metadata for %s models from OpenRouterz0Failed to fetch model metadata from OpenRouter: N)_model_metadata_cachetime_model_metadata_cache_time_MODEL_CACHE_TTLrequestsr   r	   raise_for_statusr   r   loggerdebugru   rm   loggingwarning)	r   responser   r   r(   r   r   	canonicales	   &        r-   fetch_model_metadatar    sd    22		F`8`dt7t$$+<< 5rB!!#}}XXfb)Eyyr*H %)),<f"E'>2)F)J)JKbdh)i		&(3599Y3	E u6		"2B7IyY2"5U; * !&%)YY["Es5zR +J1#NO$***+s,   C"E< ,E< 4AE< <F:(F5/F:5F:rF   c                    V ^8  d   QhR\         R\         R\        R\        \         \        \         \        3,          3,          /# )r'   rD   api_keyr   r)   )r+   rM   r   r   )r,   s   "r-   r.   r.     sH     X XXX X 
#tCH~
	Xr/   c                   \        V 4      pV'       d   \        V4      '       d   / # V'       gW   \        P                  V4      p\        P                  V^ 4      pVe(   \
        P
                  ! 4       V,
          \        8  d   V# V.pVP                  R4      '       d   VRR P                  R4      pM	VR,           pV'       d   Wv9  d   VP                  V4       V'       d   RRV 2/M/ pRp	V EFt  p
V
P                  R4      R,           p \        P                  ! W^
R7      pVP                  4        VP                  4       p/ pVP                  R. 4       F  p\        V\        4      '       g   K  VP                  R	4      pV'       g   K6  R
VP                  R
V4      /p\        V4      pVe   VVR&   \!        V4      pVe   VVR&   \#        V4      pV'       d   VVR&   \%        VVV4       K  	  \&        ;QJ d,    R VP                  R. 4       4       F  '       g   K   RM!	  RM! R VP                  R. 4       4       4      pV'       d    V
P                  R4      P)                  RR4      p\        P                  ! VR,           V^R7      pVP*                  '       g!   \        P                  ! VR,           V^R7      pVP*                  '       di   VP                  4       pVP                  R/ 4      pVP                  R4      pVP                  RR4      pV'       d   V'       d   VV9   d   VVV,          R&   V\        V&   \
        P
                  ! 4       \        V&   Vu # 	  V	'       d   \.        P1                  RW94       / \        V&   \
        P
                  ! 4       \        V&   / #   \,         d     L}i ; i  \,         d   pTp	 Rp?EK  Rp?ii ; i)zFetch model metadata from an OpenAI-compatible ``/models`` endpoint.

This is used for explicit custom endpoints where hardcoded global model-name
defaults are unreliable. Results are cached in memory per base URL.
Nr   rG   AuthorizationzBearer z/modelsheadersr   r   r   r   r   r   r   c              3   z   "   T F1  p\        V\        4      '       g   K  VP                  R 4      R8H  x  K3  	  R# 5i)owned_byr   N)r   r   r   )r   ms   & r-   r   0fetch_endpoint_model_metadata.<locals>.<genexpr>  s0      0AJq$4G 0j!Z/0s   ;;TFrF   r   r   r   n_ctxmodel_aliasz1Failed to fetch model metadata from %s/models: %sr   )rJ   rP   _endpoint_model_metadata_cacher   #_endpoint_model_metadata_cache_timer   _ENDPOINT_MODEL_CACHE_TTLr   rH   appendr   r   r   r   r   r   r   r   r   r   r   okrm   r   r   )rD   r  r   rT   cached	cached_at
candidates	alternater	  
last_error	candidaterw   r   r   r   r(   r   r   r   r   r   is_llamacppbase
props_resppropsgen_settingsr  r  excs   &&&                          r-   fetch_endpoint_model_metadatar!    sM    %X.J0<<	/33J?7;;JJ	499;#:>W"WMJ5!!sO**3/	&	Y0)$8?'' 34RG&*J	s#i/1	||C"EH%%'mmoG/1E VR0!%.. 99T?)/681L(M!8!?!-.<E*+(Fu(M%(45JE12*51'.E)$"5(E:! 1& #  VR0###  VR0 K $++C088CD!)d[.@'[\!]J%===%-\\$/7\]%^
!}}} * 1',yy1NPR'S , 0 0 9&+iir&B [[E5ICHE+./?@ :?*:6>Biik/
;Lc  j H*a13":.6:iik'
3I !   	J	sj   A?O A1O  !O $,O AN/%1N/AN/$N/,N/?&O /N=:O <N==O  OOOc                $    V ^8  d   QhR\         /# r'   r)   r   )r,   s   "r-   r.   r.     s     5 5 5r/   c                     \        \        P                  P                  R\         P                  ! 4       R,          4      4      p V R,          # )z8Return path to the persistent context length cache file.HERMES_HOMEz.hermeszcontext_length_cache.yaml)r   osenvironr   home)hermes_homes    r-   _get_context_cache_pathr*    s2    rzz~~mTYY[95LMNK444r/   c                F    V ^8  d   QhR\         \        \        3,          /# r#  )r   r+   rv   )r,   s   "r-   r.   r.     s      T#s(^ r/   c                 j   \        4       p V P                  4       '       g   / #  \        V 4      ;_uu_ 4       p\        P                  ! V4      ;'       g    / pRRR4       XP                  R/ 4      #   + '       g   i     L"; i  \         d#   p\        P                  RT4       / u Rp?# Rp?ii ; i)z:Load the model+provider -> context_length cache from disk.Ncontext_lengthsz'Failed to load context length cache: %s)	r*  existsopenyaml	safe_loadr   rm   r   r   )r^   fr   r  s       r-   _load_context_cacher3    s    "$D;;==	$ZZ1>>!$**D xx)2.. Z  >B	s4   B !A2B 2B	=B B2B-'B2-B2c                @    V ^8  d   QhR\         R\         R\        RR/# )r'   r(   rD   lengthr)   Nr+   rv   )r,   s   "r-   r.   r.     s.     C Cs Cc C3 C4 Cr/   c                   V  RV 2p\        4       pVP                  V4      V8X  d   R# W$V&   \        4       p VP                  P	                  RRR7       \        VR4      ;_uu_ 4       p\        P                  ! RV/VRR7       RRR4       \        P                  R	W2R
 4       R#   + '       g   i     L*; i  \         d"   p\        P                  RT4        Rp?R# Rp?ii ; i)zPersist a discovered context length for a model+provider combo.

Cache key is ``model@base_url`` so the same model name served from
different providers can have different limits.
@NT)parentsexist_okwr-  F)default_flow_stylez%Cached context length %s -> %s tokensr   z'Failed to save context length cache: %s)r3  r   r*  parentmkdirr/  r0  dumpr   inform   r   )r(   rD   r5  r   r   r^   r2  r  s   &&&     r-   save_context_lengthrA    s     G1XJ
C!Eyy~#J"$DC$6$__II(%0!N ;SAJP _  C>BBCs/   1B; *B( B; (B8	3B; ;C'C""C'c                R    V ^8  d   QhR\         R\         R\        \        ,          /# r'   r(   rD   r)   r+   r   rv   )r,   s   "r-   r.   r.   (  s%      S C HSM r/   c                F    V  RV 2p\        4       pVP                  V4      # )zBLook up a previously discovered context length for model+provider.r8  )r3  r   )r(   rD   r   r   s   &&  r-   get_cached_context_lengthrF  (  s(    G1XJ
C!E99S>r/   c                F    V ^8  d   QhR\         R\        \         ,          /# )r'   current_lengthr)   )rv   r   )r,   s   "r-   r.   r.   /  s        r/   c                4    \          F  pW8  g   K  Vu # 	  R# )z@Return the next lower probe tier, or None if already at minimum.N)CONTEXT_PROBE_TIERS)rH  tiers   & r-   get_next_probe_tierrL  /  s    # K $ r/   c                F    V ^8  d   QhR\         R\        \        ,          /# )r'   	error_msgr)   rD  )r,   s   "r-   r.   r.   7  s      c hsm r/   c                    V P                  4       p. ROpV FT  p\        P                  ! W14      pV'       g   K#  \        VP	                  ^4      4      pRTu;8:  d   R8:  g   KN   Vu #  KV  	  R# )a'  Try to extract the actual context limit from an API error message.

Many providers include the limit in their error text, e.g.:
  - "maximum context length is 32768 tokens"
  - "context_length_exceeded: 131072"
  - "Maximum context size 32768 exceeded"
  - "model's max context length is 65536"
   逖 N)zY(?:max(?:imum)?|limit)\s*(?:context\s*)?(?:length|size|window)?\s*(?:is|of|:)?\s*(\d{4,})z:context\s*(?:length|size|window)\s*(?:is|of|:)?\s*(\d{4,})z)(\d{4,})\s*(?:token)?\s*(?:context|limit)z">\s*(\d{4,})\s*(?:max|limit|token)z(\d{4,})\s*(?:max(?:imum)?)\b)r6   researchrv   group)rN  error_lowerpatternspatternr9   limits   &     r-   parse_context_limit_from_errorrY  7  sd     //#KH 		'/5A'Eu*
* +  r/   c                <    V ^8  d   QhR\         R\         R\        /# )r'   candidate_idlookup_modelr)   rL   )r,   s   "r-   r.   r.   S  s!      C s t r/   c                d    W8X  d   R# RV 9   d!   V P                  R^4      ^,          V8X  d   R# R# )a  Return True if *candidate_id* (from server) matches *lookup_model* (configured).

Supports two forms:
- Exact match:  "nvidia-nemotron-super-49b-v1" == "nvidia-nemotron-super-49b-v1"
- Slug match:   "nvidia/nvidia-nemotron-super-49b-v1" matches "nvidia-nemotron-super-49b-v1"
                (the part after the last "/" equals lookup_model)

This covers LM Studio's native API which stores models as "publisher/slug"
while users typically configure only the slug after the "local:" prefix.
TrG   F)rsplit)r[  r\  s   &&r-   _model_id_matchesr_  S  s4     #
l|223:1=Mr/   c                R    V ^8  d   QhR\         R\         R\        \        ,          /# rC  rD  )r,   s   "r-   r.   r.   f  s*     W Ws Wc Whsm Wr/   c           	     &
   ^ RI p\        V 4      p VP                  R4      pVP                  R4      '       d   VRR p \	        V4      p VP                  RR7      ;_uu_ 4       pVR8X  Ed(   VP                  V R2RV /R	7      pVP                  ^8X  d   VP                  4       pVP                  R
/ 4      pVP                  4        FA  w  rRV	9   g   K  \        V
\        \        34      '       g   K,  \        V
4      u uuRRR4       # 	  VP                  RR4      pRV9   do   VP                  R4       FY  pRV9   g   K  VP!                  4       P                  4       p\#        V4      ^8  g   K<   \        VR,          4      u uuRRR4       # 	  VR8X  Edz   VP                  V R24      pVP                  ^8X  EdT   VP                  4       pVP                  R. 4       EF,  p\'        VP                  RR4      V 4      '       g%   \'        VP                  RR4      V 4      '       g   KJ  VP                  R. 4       Fe  pVP                  R/ 4      pVP                  R4      pV'       g   K0  \        V\        \        34      '       g   KN  \        V4      u u uuRRR4       # 	  VP                  R4      ;'       g    VP                  R4      pV'       g   K  \        V\        \        34      '       g   EK  \        V4      u uuRRR4       # 	  VP                  V RV  24      pVP                  ^8X  d   VP                  4       pVP                  R4      ;'       g+    VP                  R4      ;'       g    VP                  R4      pV'       d2   \        V\        \        34      '       d   \        V4      uuRRR4       # VP                  V R24      pVP                  ^8X  d   VP                  4       pVP                  R. 4      pV F  p\'        VP                  RR4      V 4      '       g   K'  VP                  R4      ;'       g+    VP                  R4      ;'       g    VP                  R4      pV'       g   Kt  \        V\        \        34      '       g   K  \        V4      u uuRRR4       # 	  RRR4       R#   \
         d    Rp ELi ; i  \$         d     EK  i ; i  + '       g   i     R# ; i  \
         d     R# i ; i)z4Query a local server for the model's context length.NrG   r   g      @r   r   z	/api/showr   )r   
model_infor   
parametersrF   num_ctx
r   r   r   r   r   loaded_instancesconfigmax_context_lengthz/v1/models/max_model_len
max_tokensz
/v1/modelsr   r   )r   r=   rH   r   r   rm   r   postr   r   r   r`   r   rv   floatr4   r5   ru   rt   r_  )r(   rD   r   r   server_typer   respr   rb  r   r   paramslinery   r  instcfgctxmodels_lists   &&                 r-   _query_local_context_lengthrv  f  s3    #5)E %J5!!_
.x8C\\#\&&&h&{{j\#;65/{R##s*99;D!%,!;J&0&6&6&8
+s2z%#u7V7V#&u:- '& '9 "XXlB7F F*$*LL$6D(D0(,

(:(:(<#&u:?%-/259~(=' '& %7 k)zzZL"?@##s*99;D!XXh3,QUU5"-=uEEIZ[\[`[`aegi[jlqIrIr().@"(E&*hhx&<&)gg.>&?#&3:cC<+H+H+.s8OO '&F )F #$%%(<"="X"XGWAXC"sz#U|'D'D'*3xW '&@ 4 ::Kw?@D3&yy{hh/gg488<L3MggQUQYQYZfQg:cC<88s8i '&p ::J78D3&yy{"hhvr2$A(tR%@@eeO4ff>N8OffSTSXSXYeSf3:cC<#@#@#&s8OA '&x %y 'H Q  0 ,6 %-(,%-) '&H   s  S 
T $A-S-S-4S- 
T 3S-+S-4SS-
T BS-.=S-0S-S-
T 'S-S-S-<S-
T AS-$S-=S-&S-=
T A+S-8S-S-)S-S-#S-/
T :S-<T SSS*	%S-)S*	*S--S>	8T >T TTc                0    V ^8  d   QhR\         R\         /# r&   r*   )r,   s   "r-   r.   r.     s     # #C #C #r/   c                &    V P                  RR4      # )zNormalize version separators for matching.

Nous uses dashes: claude-opus-4-6, claude-sonnet-4-5
OpenRouter uses dots: claude-opus-4.6, claude-sonnet-4.5
Normalize both to dashes for comparison.
rk   -)r   )r(   s   &r-   _normalize_model_versionrz    s     ==c""r/   c          	      ^    V ^8  d   QhR\         R\         R\         R\        \        ,          /# )r'   r(   rD   r  r)   rD  )r,   s   "r-   r.   r.     s/      3 #  PXY\P] r/   c                X   V'       d   VP                  R4      '       d   R#  VP                  R4      pVP                  R4      '       d   VRR pV R2pRVRR/p\        P                  ! WE^
R	7      pVP
                  ^8w  d   R# VP                  4       pVP	                  R
. 4       FO  pVP	                  R4      V 8X  g   K  VP	                  R4      p	\        V	\        4      '       g   KD  V	^ 8  g   KM  V	u # 	  R#   \         d"   p
\        P                  RT
4        Rp
?
R# Rp
?
ii ; i)zQuery Anthropic's /v1/models endpoint for context length.

Only works with regular ANTHROPIC_API_KEY (sk-ant-api*).
OAuth tokens (sk-ant-oat*) from Claude Code return 401.
z
sk-ant-oatNrG   r   z/v1/models?limit=1000z	x-api-keyzanthropic-versionz
2023-06-01r  r   r   max_input_tokensz%Anthropic /v1/models query failed: %sr   )r3   rH   r   r   r   r   r   r   rv   rm   r   r   )r(   rD   r  r  rw   r	  ro  r   r  rt  r  s   &&&        r-   _query_anthropic_context_lengthr~    s    g((66As#==9D+,
 ||C"=s"yy{&"%AuuT{e#ee./c3''C!GJ	 &   A<a@@As;   'C= 9C= 8C= $C= ,C= 5C= 9C= =D)D$$D)c                F    V ^8  d   QhR\         R\        \        ,          /# r&   rD  )r,   s   "r-   r.   r.     s        r/   c                b   \        4       pW9   d   W,          P                  R4      # \        V 4      P                  4       pVP	                  4        F{  w  r4RV9   d   VP                  R^4      ^,          MTpVP                  4       V P                  4       8X  g!   \        V4      P                  4       V8X  g   Kj  VP                  R4      u # 	  V P                  4       pVP	                  4        F  w  r4RV9   d   VP                  R^4      ^,          MTpVP                  4       V3\        V4      P                  4       V33 Fc  w  rxVP                  V4      '       g   K  \        V4      \        V4      8X  g   V\        V4      ,          R9   g   KP  VP                  R4      u u # 	  K  	  R# )u   Resolve Nous Portal model context length via OpenRouter metadata.

Nous model IDs are bare (e.g. 'claude-opus-4-6') while OpenRouter uses
prefixed IDs (e.g. 'anthropic/claude-opus-4.6'). Try suffix matching
with version normalization (dot↔dash).
r   rG   z-:.N)r  r   rz  r6   r`   r4   r3   ru   )	r(   metadatarT   or_idr   baremodel_lowerr  querys	   &        r-   _resolve_nous_context_lengthr    s[    $%H""#344)%0668J (),u{{3"1%5::<5;;=(,DT,J,P,P,RV`,`99-.. ) ++-K (),u{{3"1%5"&**,!<?WX\?]?c?c?egq>r sI##E**I#e*,	#e*0E0Nyy!122	 !t ) r/   c                n    V ^8  d   QhR\         R\         R\         R\        R,          R\         R\        /# )r'   r(   rD   r  config_context_lengthNrd   r)   r6  )r,   s   "r-   r.   r.   
  sP     $ $$$ $ :	$
 $ 	$r/   c                   Ve    \        V\        4      '       d
   V^ 8  d   V# \        V 4      p V'       d   \        W4      pVe   V# \	        V4      '       Ed2   \        V4      '       Eg    \        WR7      pVP                  V 4      pV'       g]   \        V4      ^8X  d$   \        \        VP                  4       4      4      pM)VP                  4        F  w  rW9   g	   W9   g   K  T	p M	  V'       d*   VP                  R4      p
\        V
\        4      '       d   V
# \        V4      '       g]   \        V4      '       d)   \        W4      pV'       d   V^ 8  d   \        WV4       V# \         P#                  RW\$        R 4       \$        # VR8X  g   V'       d(   RV9   d!   \'        Y;'       g    RV4      pV'       d   V# TpV'       d   VR9   d   V'       d   \)        V4      pV'       d   TpVR8X  d   \+        V 4      pV'       d   V# V'       d   ^ R	IHp V! W4      pV'       d   V# \1        4       pV V9   d   VV ,          P                  RR
4      # V P3                  4       p\5        \6        P                  4       R RR7       F  w  ppVV9   g   K  Vu # 	  V'       d:   \        V4      '       d)   \        W4      pV'       d   V^ 8  d   \        WV4       V# \$        # )a3  Get the context length for a model.

Resolution order:
0. Explicit config override (model.context_length or custom_providers per-model)
1. Persistent cache (previously discovered via probing)
2. Active endpoint metadata (/models for explicit custom endpoints)
3. Local server query (for local endpoints)
4. Anthropic /v1/models API (API-key users only, not OAuth)
5. OpenRouter live API metadata
6. Nous suffix-match via OpenRouter cache
7. models.dev registry lookup (provider-aware)
8. Thin hardcoded defaults (broad family patterns)
9. Default fallback (128K)
)r  r   u   Could not detect context length for model %r at %s — defaulting to %s tokens (probe-down). Set model.context_length in config.yaml to override.r   r   rW   zhttps://api.anthropic.comr   )lookup_models_dev_contextr>   c                 &    \        V ^ ,          4      # )    ru   )xs   &r-   <lambda>*get_model_context_length.<locals>.<lambda>|  s    s1Q4yr/   T)r   reverse)r
   r   )r   rv   r=   rF  rU   rh   r!  r   ru   nextiterr   r`   r|   rv  rA  r   r@  DEFAULT_FALLBACK_CONTEXTr~  re   r  agent.models_devr  r  r6   sortedDEFAULT_CONTEXT_LENGTHS)r(   rD   r  r  rd   r  endpoint_metadatamatchedr   r   r   	local_ctxrt  effective_providerinferredr  r  r  default_modelr5  s   &&&&&               r-   get_model_context_lengthr  
  s   , (Z8Ms-S-SXmpqXq$$
 #5)E *5;M 8$$-H-R-R9(T#''.$%*t$5$<$<$>?@ #4"9"9";JC|s|"' #< $[[)9:N.#..%%*844 **7H	Q'C$$KK. $<Q#?	 ,+ ;(H4-e5\5\A\^efJ "!37O!O/9H%-"V#*51J>'(:BJ $%H""#3V<< ++-K!'%%'-@$"v K'M	" %h///@	Q; $#r/   c                0    V ^8  d   QhR\         R\        /# )r'   r   r)   r6  )r,   s   "r-   r.   r.     s        r/   c                :    V '       g   ^ # \        V 4      ^,          # )z<Rough token estimate (~4 chars/token) for pre-flight checks.r  )r   s   &r-   estimate_tokens_roughr    s    t9>r/   c                h    V ^8  d   QhR\         \        \        \        3,          ,          R\        /# )r'   messagesr)   )r   r   r+   r   rv   )r,   s   "r-   r.   r.     s'      T$sCx.-A c r/   c                8    \        R V  4       4      pV^,          # )z:Rough token estimate for a message list (pre-flight only).c              3   J   "   T F  p\        \        V4      4      x  K  	  R # 5irS   ru   r+   r   msgs   & r-   r   1estimate_messages_tokens_rough.<locals>.<genexpr>  s     8xc#c(mmx   !#)sum)r  total_charss   & r-   estimate_messages_tokens_roughr    s    8x88K!r/   system_prompttoolsc          
          V ^8  d   QhR\         \        \        \        3,          ,          R\        R\        \         \        \        \        3,          ,          ,          R\
        /# )r'   r  r  r  r)   )r   r   r+   r   r   rv   )r,   s   "r-   r.   r.     sQ      4S>"  Dc3h()	
 	r/   c                   ^ pV'       d   V\        V4      ,          pV '       d   V\        R V  4       4      ,          pV'       d   V\        \        V4      4      ,          pV^,          # )u(  Rough token estimate for a full chat-completions request.

Includes the major payload buckets Hermes sends to providers:
system prompt, conversation messages, and tool schemas.  With 50+
tools enabled, schemas alone can add 20-30K tokens — a significant
blind spot when only counting messages.
c              3   J   "   T F  p\        \        V4      4      x  K  	  R # 5irS   r  r  s   & r-   r   0estimate_request_tokens_rough.<locals>.<genexpr>  s     =HS3s3x==Hr  )ru   r  r+   )r  r  r  r  s   &$$ r-   estimate_request_tokens_roughr    sS     Ks=))s=H===s3u:&!r/   c                   V ^8  d   Qh/ ^ \         9   d   \        \        ,          ;R&   ^\         9   d,   \        \        \        \        \        3,          3,          ;R&   ^\         9   d
   \
        ;R&   ^\         9   d=   \        \        \        \        \        \        \        3,          3,          3,          ;R&   ^\         9   d   \        \        \
        3,          ;R&   ^\         9   d   \        \        \        3,          ;R&   # )r'   r7   r   r   r  r  r_   )__conditional_annotations__	frozensetr+   r   r   rm  )r,   s   "r-   r.   r.      s      0
 
IcN 
1| 6 5tCc3h/0 5}~ & %E %B J IS$sDcN/B*C%C D ICD ; :T#u*%5 :EX $sCx. Yr/   )r>   i   i }  i>  i@  )
r   context_windowrh  max_position_embeddingsri  r}  max_sequence_lengthmax_seq_lenn_ctx_trainr  )r   max_output_tokensrj  )	localhostz	127.0.0.1z::1z0.0.0.0)rP  rQ  )F)rF   F)rF   rF   NrF   )Ir  __doc__r   r&  rR  r   pathlibr   typingr   r   r   r   urllib.parser   r   r0  hermes_constantsr	   	getLogger__name__r   r  r7   compile
IGNORECASEr8   r=   r   r   r   r  r  r  rJ  r  r  r   r   rn   rJ   rP   rU   r_   re   rh   r|   r   r   r   r   r   r   r   r   r  r!  r*  r3  rA  rF  rL  rY  r_  rv  rz  r~  r  r  r  r  r  r.   )r  s   @r-   <module>r     s    	 	   , , !   2			8	$
 &/ 
0
0
0(
0*3
05B
0
0
0"
0$-
0/;
0=H
0JT
0 
0 "
0 $0
0 2<
0 >G
0 	
0 	
0 
0 
0  2
0 

0 
0 
0 #
0 %-
0 /?
0 
0 
0 (
0 *2
0 4?
0 
0 
0 
0 &
0 (.
0 0;
0 =E
0 GM
0 
&  
 jjLMM & 46  5$%  % GI  I8: # : 
  /q1 /
 w/ / w/ / f/ w/ V/ V/  g!/$ 6%/& 6'/( v)/* T+/. //2 V3/6 F7/: v;/> 
6
FvfFF!6f7GV]/ b   <0DH
$h$8$ $ 	$
 }$ M$ 9$ i$ "9$ \$ ($ %f$ 
$ Y$ 	$  !$  ($:!H8v0	==.,!+HXv5C*8&Wt#>B$D  -1	 r/   