
    Ji                        U d Z ddlZddlZddlZddlZddlmZ ddlmZm	Z	m
Z
mZ ddlmZ ddlZddlZddlmZ  ej        e          Z eh d          Zee         ed<    ej        d	ej                  Zd
edefdZi ae	ee	eef         f         ed<   daeed<   dZ i Z!e	ee	ee	eef         f         f         ed<   i Z"e	eef         ed<   dZ#g dZ$e$d         Z%i ddddddddddddddddd d!d"dd#d$d%d$d&d'd(d)d*d+d,d$d-d$d.d+d+d'd/d)d0Z&d1Z'd2Z(d3Z)d4edefd5Z*d4ede+fd6Z,d4ede+fd7Z-i d8d9d:d9d;d<d=d>d?d@dAd@dBd&dCdDdEdDdFdGdHdIdJdKdLd"dMdNdOdNdPdQZ.e	eef         edR<   d4edee         fdSZ/d4ede+fdTZ0d4ede+fdUZ1d4edee         fdVZ2dWefdXZ3ddWed[e4d\e4dee4         fd]Z5d^e	eef         d_e6ed`f         dee4         fdaZ7d^e	eef         dee4         fdbZ8d^e	eef         dee4         fdcZ9d^e	eef         de	eef         fddZ:dee	ee	eef         f         dfedge	eef         ddfdhZ;ddje+de	ee	eef         f         fdkZ<	 	 dd4edmedje+de	ee	eef         f         fdnZ=defdoZ>de	ee4f         fdpZ?d
ed4edqe4ddfdrZ@d
ed4edee4         fdsZAdte4dee4         fduZBdvedee4         fdwZCdxedyede+fdzZDd
ed4edee4         fd{ZEd
edefd|ZFd
ed4edmedee4         fd}ZGd
edee4         fd~ZH	 	 	 	 dd
ed4edmede4dz  dede4fdZIdede4fdZJde
e	eef                  de4fdZKdlddde
e	eef                  dedee
e	eef                           de4fdZLdS )zModel metadata, context lengths, and token estimation utilities.

Pure utility functions with no AIAgent dependency. Used by ContextCompressor
and run_agent.py for pre-flight context checks.
    N)Path)AnyDictListOptional)urlparse)OPENROUTER_MODELS_URL>%   z-aiz.ai	deep-seek
ai-gateway
minimax-cncopilot-acpopencode-goopenai-codexopencode-zengithub-modelsgithub-copilotgoglmzaizenkilokiminousqwenlocalzhipualiyunclaudecustomgithubvercelalibabacopilotminimaxdeepseekkilocodemoonshotopencode	anthropic	dashscope
openrouterkimi-coding_PROVIDER_PREFIXESzE^(\d+\.?\d*b|latest|stable|q\d|fp?\d|instruct|chat|coder|vision|text)modelreturnc                 ,   d| vs|                      d          r| S |                     dd          \  }}|                                                                }|t          v r0t
                              |                                          r| S |S | S )ua  Strip a recognised provider prefix from a model string.

    ``"local:my-model"`` → ``"my-model"``
    ``"qwen3.5:27b"``   → ``"qwen3.5:27b"``  (unchanged — not a provider prefix)
    ``"qwen:0.5b"``     → ``"qwen:0.5b"``    (unchanged — Ollama model:tag)
    ``"deepseek:latest"``→ ``"deepseek:latest"``(unchanged — Ollama model:tag)
    :http   )
startswithsplitstriplowerr/   _OLLAMA_TAG_PATTERNmatch)r0   prefixsuffixprefix_lowers       1/home/ubuntu/hermes-agent/agent/model_metadata.py_strip_provider_prefixr@   +   s     %5++F33[[a((NFF<<>>''))L)))$$V\\^^44 	LL    _model_metadata_cache_model_metadata_cache_timei  _endpoint_model_metadata_cache#_endpoint_model_metadata_cache_timei,  )  i   i }  i>  i@  zclaude-opus-4-6i@B zclaude-sonnet-4-6zclaude-opus-4.6zclaude-sonnet-4.6r    i@ zgpt-4.1i zgpt-5rF   zgpt-4geminii   r'   llamai   r   r&   i   r   i  r   i   zQwen/Qwen3.5-397B-A17BzQwen/Qwen3.5-35B-A3Bi   i   )zdeepseek-ai/DeepSeek-V3.2zmoonshotai/Kimi-K2.5zmoonshotai/Kimi-K2-ThinkingzMiniMaxAI/MiniMax-M2.5zXiaomiMiMo/MiMo-V2-Flashzzai-org/GLM-5)
context_lengthcontext_windowmax_context_lengthmax_position_embeddingsmax_model_lenmax_input_tokensmax_sequence_lengthmax_seq_lenn_ctx_trainn_ctx)max_completion_tokensmax_output_tokens
max_tokens)	localhostz	127.0.0.1z::1z0.0.0.0base_urlc                 T    | pd                                                     d          S )N /)r8   rstriprW   s    r?   _normalize_base_urlr]      s&    N!!##**3///rA   c                 H    dt          |                                           v S )Nopenrouter.ai)r]   r9   r\   s    r?   _is_openrouter_base_urlr`      s"    1(;;AACCCCrA   c                 ^    t          |           }t          |          ot          |           S N)r]   boolr`   )rW   
normalizeds     r?   _is_custom_endpointre      s.    $X..J
G$;J$G$G GGrA   zapi.openai.comopenaizchatgpt.comapi.anthropic.comr+   zapi.z.air   zapi.moonshot.air.   zapi.kimi.comzapi.minimaxzdashscope.aliyuncs.comr$   zdashscope-intl.aliyuncs.comr_   r-   z!generativelanguage.googleapis.comgooglezinference-api.nousresearch.comr   zapi.deepseek.comzapi.githubcopilot.comr%   zmodels.github.aizapi.fireworks.ai	fireworks_URL_TO_PROVIDERc                    t          |           }|sdS t          d|v r|nd|           }|j                                        p|j                                        }t
                                          D ]\  }}||v r|c S dS )a  Infer the models.dev provider name from a base URL.

    This allows context length resolution via models.dev for custom endpoints
    like DashScope (Alibaba), Z.AI, Kimi, etc. without requiring the user to
    explicitly set the provider name in config.
    N://zhttps://)r]   r   netlocr9   pathrj   items)rW   rd   parsedhosturl_partproviders         r?   _infer_provider_from_urlrt      s     %X..J tEZ$7$7jj=T
=T=TUUF=  7FK$5$5$7$7D.4466  (tOOO 4rA   c                 $    t          |           d uS rb   )rt   r\   s    r?   _is_known_provider_base_urlrv      s    #H--T99rA   c                 P   t          |           }|sdS d|v r|nd| }	 t          |          }|j        pd}n# t          $ r Y dS w xY w|t          v rdS ddl}	 |                    |          }|j        p|j        p|j	        S # t          $ r Y nw xY w|                    d          }t          |          d	k    rj	 t          |d                   t          |d
                   }	}|dk    rdS |dk    rd|	cxk    rdk    rn ndS |dk    r|	dk    rdS n# t          $ r Y nw xY wdS )zOReturn True if base_url points to a local machine (localhost / RFC-1918 / WSL).Frl   zhttp://rY   Tr   N.   r5   
                  )r]   r   hostname	Exception_LOCAL_HOSTS	ipaddress
ip_address
is_privateis_loopbackis_link_local
ValueErrorr7   lenint)
rW   rd   urlrp   rq   r   addrpartsfirstseconds
             r?   is_local_endpointr      s   $X..J u++**1G:1G1GC#$"   uu|t##D))H$"2Hd6HH    JJsOOE
5zzQ		aMM3uQx==6E{{t||f 2 2 2 2 2 2 2 2 2t||#t 	 	 	D	5s?   9 
AA)B 
BB=0D /D D 
D#"D#c                    ddl }t          |           }|}|                    d          r
|dd         }	 |                    d          5 }	 |                    | d          }|j        dk    r	 ddd           d	S n# t          $ r Y nw xY w	 |                    | d
          }|j        dk    r8	 |                                }d|v r	 ddd           dS n# t          $ r Y nw xY wn# t          $ r Y nw xY w	 |                    | d          }|j        dk    r|                    | d          }|j        dk    rd|j        v r	 ddd           dS n# t          $ r Y nw xY w	 |                    | d          }|j        dk    r&|                                }d|v r	 ddd           dS n# t          $ r Y nw xY wddd           n# 1 swxY w Y   n# t          $ r Y nw xY wdS )zDetect which local server is running at base_url by probing known endpoints.

    Returns one of: "ollama", "lm-studio", "vllm", "llamacpp", or None.
    r   N/v1g       @timeout/api/v1/models   	lm-studioz	/api/tagsmodelsollama	/v1/props/propsdefault_generation_settingsllamacppz/versionversionvllm)	httpxr]   endswithClientgetstatus_coder   jsontext)rW   r   rd   
server_urlclientrdatas          r?   detect_local_server_typer      sQ   
 LLL$X..JJ5!! %_
*\\#\&& '	&JJ*<<<===C''&'	 '	 '	 '	 '	 '	 '	 '	 (   

JJ*77788=C'' vvxx#t++#+#'	 '	 '	 '	 '	 '	 '	 '	  ,$      JJ*77788=C''

j#8#8#899A=C'',IQV,S,S%9'	 '	 '	 '	 '	 '	 '	 '	:    JJ*66677=C''6688D D((%K'	 '	 '	 '	 '	 '	 '	 '	L    M'	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	 '	P     4s  G G#B 2G ?G 
B
GBG#C-5CG C-
C)&C-(C))C-,G-
C:7G9C::G>AEG G
E)&G(E))G-;F7)G 6G7
GGGGG GG GG 
G,+G,valuec              #      K   t          | t                    r2| V  |                                 D ]}t          |          E d {V  d S t          | t                    r| D ]}t          |          E d {V  d S d S rb   )
isinstancedictvalues_iter_nested_dictslist)r   nesteditems      r?   r   r   ,  s      % 0llnn 	2 	2F)&1111111111	2 	2	E4	 	  0 	0 	0D)$//////////0 0	0 	0rA      逖 minimummaximumc                 $   	 t          | t                    rd S t          | t                    r(|                                                     dd          } t          |           }n# t          t          f$ r Y d S w xY w||cxk    r|k    rn n|S d S )N,rY   )r   rc   strr8   replacer   	TypeErrorr   )r   r   r   results       r?   _coerce_reasonable_intr   6  s    eT"" 	4eS!! 	3KKMM))#r22EUz"   tt&####G#####4s   A& AA& &A;:A;payloadkeys.c                     d |D             }t          |           D ]W}|                                D ]@\  }}t          |                                          |vr)t	          |          }||c c S AXd S )Nc                 6    h | ]}|                                 S  )r9   ).0keys     r?   	<setcomp>z%_extract_first_int.<locals>.<setcomp>E  s     ***cciikk***rA   )r   ro   r   r9   r   )r   r   keysetmappingr   r   coerceds          r?   _extract_first_intr   D  s    **T***F%g..  !--// 	 	JC3xx~~v--,U33G" #		 4rA   c                 ,    t          | t                    S rb   )r   _CONTEXT_LENGTH_KEYSr   s    r?   _extract_context_lengthr   P      g';<<<rA   c                 ,    t          | t                    S rb   )r   _MAX_COMPLETION_KEYSr   s    r?   _extract_max_completion_tokensr   T  r   rA   c                 X   dddddd}t          |           D ]}d |                                D             t          fd|                                D                       sNi }|                                D ]%\  }}|D ]}|v r|         d	vr|         ||<    n&|r|c S i S )
N)promptinputinput_cost_per_tokenprompt_token_cost)
completionoutputoutput_cost_per_tokencompletion_token_cost)requestrequest_cost)
cache_readcached_promptinput_cache_readcache_read_cost_per_token)cache_writecache_creationinput_cache_writecache_write_cost_per_token)r   r   r   r   r   c                 X    i | ]'\  }}t          |                                          |(S r   )r   r9   )r   r   r   s      r?   
<dictcomp>z$_extract_pricing.<locals>.<dictcomp>a  s.    PPP*#uc#hhnn&&PPPrA   c              3   N   K   | ]}t          fd |D                       V   dS )c              3       K   | ]}|v V  	d S rb   r   )r   aliasrd   s     r?   	<genexpr>z-_extract_pricing.<locals>.<genexpr>.<genexpr>b  s(      @@uu
*@@@@@@rA   N)any)r   aliasesrd   s     r?   r   z#_extract_pricing.<locals>.<genexpr>b  s@      ccW3@@@@@@@@@ccccccrA   )NrY   )r   ro   r   r   )r   	alias_mapr   pricingtargetr   r   rd   s          @r?   _extract_pricingr   X  s   R`.fk I &g..  PPPPP
ccccPYP`P`PbPbccccc 	"$(00 	 	OFG   J&&:e+<J+N+N&0&7GFOE 	NNN	IrA   cachemodel_identryc                     || |<   d|v r4|                     dd          d         }|                     ||           d S d S )NrZ   r5   )r7   
setdefault)r   r   r   
bare_models       r?   _add_model_aliasesr   o  sO    E(O
h^^C++A.
U+++++ rA   Fforce_refreshc                    | s2t           r+t          j                    t          z
  t          k     rt           S 	 t	          j        t          d          }|                                 |                                }i }|                    dg           D ]}|                    dd          }|                    dd          |                    di                               d	d
          |                    d|          |                    di           d}t          |||           |                    dd          }|r||k    rt          |||           |a t          j                    at                              dt          |                     |S # t          $ r*}t          j        d|            t           pi cY d}~S d}~ww xY w)z9Fetch model metadata from OpenRouter (cached for 1 hour).rz   r   r   idrY   rI   rF   top_providerrS   i   namer   )rI   rS   r   r   canonical_slugz.Fetched metadata for %s models from OpenRouterz0Failed to fetch model metadata from OpenRouter: N)rB   timerC   _MODEL_CACHE_TTLrequestsr   r	   raise_for_statusr   r   loggerdebugr   r   loggingwarning)	r   responser   r   r0   r   r   	canonicales	            r?   fetch_model_metadatar  v  s     %2 %	F`8`dt7t7t$$+< 5rBBB!!###}}XXfb)) 	< 	<Eyyr**H"')),<f"E"E).>2)F)F)J)JKbdh)i)i		&(33 99Y33	 E uh666		"2B77I <Y(22"5)U;;; %%)Y[["Es5zzRRR + + +N1NNOOO$*******+s   EF 
GG=GGrY   api_keyc                    t          |           }|rt          |          ri S |sXt                              |          }t                              |d          }|!t          j                    |z
  t          k     r|S |g}|                    d          r|dd                             d          }n|dz   }|r||vr|	                    |           |rdd| ini }d}	|D ]d}
|
                    d          dz   }	 t          j        ||d	
          }|                                 |                                }i }|                    dg           D ]}t          |t                    s|                    d          }|s0d|                    d|          i}t          |          }|||d<   t!          |          }|||d<   t#          |          }|r||d<   t%          |||           t'          d |                    dg           D                       }|r	 |
                    d                              dd          }t          j        |dz   |d
          }|j        st          j        |dz   |d
          }|j        rh|                                }|                    di           }|                    d          }|                    dd          }|r|r||v r|||         d<   n# t,          $ r Y nw xY w|t          |<   t          j                    t          |<   |c S # t,          $ r}|}	Y d}~^d}~ww xY w|	rt.                              d||	           i t          |<   t          j                    t          |<   i S )zFetch model metadata from an OpenAI-compatible ``/models`` endpoint.

    This is used for explicit custom endpoints where hardcoded global model-name
    defaults are unreliable. Results are cached in memory per base URL.
    r   Nr   r   rZ   AuthorizationzBearer z/modelsrz   headersr   r   r   r   rI   rS   r   c              3   r   K   | ]2}t          |t                    |                    d           dk    V  3dS )owned_byr   N)r   r   r   )r   ms     r?   r   z0fetch_endpoint_model_metadata.<locals>.<genexpr>  sV        Jq$4G4Gj!!Z/     rA   rY   r      r   r   rR   model_aliasz1Failed to fetch model metadata from %s/models: %s)r]   r`   rD   r   rE   r  _ENDPOINT_MODEL_CACHE_TTLr   r[   appendr  r  r   r   r   r   r   r   r   r   r   okr   r  r  )rW   r  r   rd   cached	cached_at
candidates	alternater  
last_error	candidater   r	  r   r   r0   r   r   rI   rS   r   is_llamacppbase
props_resppropsgen_settingsrR   r  excs                                r?   fetch_endpoint_model_metadatar&    s/    %X..J 0<< 	 /33J??7;;JJJ	49;;#:>W"W"WMJ5!! 'ssO**3//		&	 %Yj00)$$$8?G 3' 3 344RG&*J 3 3	s##i/1	|C"EEEH%%'''mmooG/1E VR00 ; ;!%..  99T?? )/681L1L(M!8!?!?!-.<E*+(Fu(M(M%(45JE12*511 /'.E)$"5(E::::    VR00    K  $++C0088CCD!)d[.@'[\!]!]!]J%= _%-\$/7\]%^%^%^
!} I * 1 1',yy1NPR'S'S , 0 0 9 9&+iir&B&B  I[ I[E5I5ICHE+./?@    D :?*:6>Bikk/
;LLL 	 	 	JJJJJJ	  bH*V`aaa13":.6:ikk'
3Is>   >D"L.!CK54L.5
L?L.L)L..
M8M  Mc                      t          t          j                            dt          j                    dz                      } | dz  S )z8Return path to the persistent context length cache file.HERMES_HOMEz.hermeszcontext_length_cache.yaml)r   osenvironr   home)hermes_homes    r?   _get_context_cache_pathr-    s6    rz~~mTY[[95LMMNNK444rA   c                  Z   t                      } |                                 si S 	 t          |           5 }t          j        |          pi }ddd           n# 1 swxY w Y   |                    di           S # t          $ r'}t                              d|           i cY d}~S d}~ww xY w)z:Load the model+provider -> context_length cache from disk.Ncontext_lengthsz'Failed to load context length cache: %s)	r-  existsopenyaml	safe_loadr   r   r  r  )rn   fr   r  s       r?   _load_context_cacher5    s    "$$D;;== 	$ZZ 	+1>!$$*D	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+ 	+xx)2...   >BBB						s@   A9 AA9 AA9 A A9 9
B*B%B*%B*lengthc                    |  d| }t                      }|                    |          |k    rdS |||<   t                      }	 |j                            dd           t          |d          5 }t          j        d|i|d           ddd           n# 1 swxY w Y   t          	                    d	||d
           dS # t          $ r&}t                              d|           Y d}~dS d}~ww xY w)zPersist a discovered context length for a model+provider combo.

    Cache key is ``model@base_url`` so the same model name served from
    different providers can have different limits.
    @NT)parentsexist_okwr/  F)default_flow_stylez%Cached context length %s -> %s tokensr   z'Failed to save context length cache: %s)r5  r   r-  parentmkdirr1  r2  dumpr  infor   r  )r0   rW   r6  r   r   rn   r4  r  s           r?   save_context_lengthrA  	  si    

X

C!!Eyy~~E#J"$$DC$666$__ 	OI(%0!NNNN	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O 	O;SV--PPPPP C C C>BBBBBBBBBCs<   ,C 1BC BC B!C 
C2C--C2c                 V    |  d| }t                      }|                    |          S )zBLook up a previously discovered context length for model+provider.r8  )r5  r   )r0   rW   r   r   s       r?   get_cached_context_lengthrC    s0    

X

C!!E99S>>rA   current_lengthc                 .    t           D ]}|| k     r|c S dS )z@Return the next lower probe tier, or None if already at minimum.N)CONTEXT_PROBE_TIERS)rD  tiers     r?   get_next_probe_tierrH  %  s/    #  .  KKK !4rA   	error_msgc                     |                                  }g d}|D ]O}t          j        ||          }|r6t          |                    d                    }d|cxk    rdk    rn K|c S PdS )a?  Try to extract the actual context limit from an API error message.

    Many providers include the limit in their error text, e.g.:
      - "maximum context length is 32768 tokens"
      - "context_length_exceeded: 131072"
      - "Maximum context size 32768 exceeded"
      - "model's max context length is 65536"
    )zY(?:max(?:imum)?|limit)\s*(?:context\s*)?(?:length|size|window)?\s*(?:is|of|:)?\s*(\d{4,})z:context\s*(?:length|size|window)\s*(?:is|of|:)?\s*(\d{4,})z)(\d{4,})\s*(?:token)?\s*(?:context|limit)z">\s*(\d{4,})\s*(?:max|limit|token)z(\d{4,})\s*(?:max(?:imum)?)\br5   r   r   N)r9   researchr   group)rI  error_lowerpatternspatternr;   limits         r?   parse_context_limit_from_errorrR  -  s     //##K  H   	';// 	A''Eu****
*****4rA   candidate_idlookup_modelc                 b    | |k    rdS d| v r"|                      dd          d         |k    rdS dS )a  Return True if *candidate_id* (from server) matches *lookup_model* (configured).

    Supports two forms:
    - Exact match:  "nvidia-nemotron-super-49b-v1" == "nvidia-nemotron-super-49b-v1"
    - Slug match:   "nvidia/nvidia-nemotron-super-49b-v1" matches "nvidia-nemotron-super-49b-v1"
                    (the part after the last "/" equals lookup_model)

    This covers LM Studio's native API which stores models as "publisher/slug"
    while users typically configure only the slug after the "local:" prefix.
    TrZ   r5   F)rsplit)rS  rT  s     r?   _model_id_matchesrW  I  sH     |##t
l|223::1=MMt5rA   c                 R
   ddl }t          |           } |                    d          }|                    d          r
|dd         }	 t	          |          }n# t
          $ r d}Y nw xY w	 |                    d          5 }|dk    rL|                    | d	d
| i          }|j        dk    r$|	                                }|
                    di           }|                                D ]B\  }	}
d|	v r9t          |
t          t          f          rt          |
          c cddd           S C|
                    dd          }d|v r|                    d          D ]s}d|v rm|                                                                }t#          |          dk    r4	 t          |d                   c cddd           S # t$          $ r Y ow xY wt|dk    r|
                    | d          }|j        dk    r]|	                                }|
                    dg           D ]1}t'          |
                    dd          |           s$t'          |
                    dd          |           r|
                    dg           D ]j}|
                    di           }|
                    d          }|r;t          |t          t          f          rt          |          c c cddd           S k|
                    d          p|
                    d          }|r9t          |t          t          f          rt          |          c cddd           S 3|
                    | d|            }|j        dk    r|	                                }|
                    d          p)|
                    d          p|
                    d          }|r7t          |t          t          f          rt          |          cddd           S |
                    | d           }|j        dk    r|	                                }|
                    d!g           }|D ]}t'          |
                    dd          |           rz|
                    d          p)|
                    d          p|
                    d          }|r9t          |t          t          f          rt          |          c cddd           S ddd           n# 1 swxY w Y   n# t
          $ r Y nw xY wdS )"z4Query a local server for the model's context length.r   NrZ   r   r   g      @r   r   z	/api/showr   )r   r   
model_inforI   
parametersrY   num_ctx
   r   r   r   r   r   loaded_instancesconfigrK   z/v1/models/rM   rU   z
/v1/modelsr   )r   r@   r[   r   r   r   r   postr   r   r   ro   r   r   floatr7   r8   r   r   rW  )r0   rW   r   r   server_typer   respr   rY  r   r   paramsliner   r  instcfgctxmodels_lists                      r?   _query_local_context_lengthrk  \  s   LLL #5))E %%J5!! %_
.x88   C\\#\&& @	,&h&&{{j#;#;#;65/{RR#s**99;;D!%,!;!;J&0&6&6&8&8 . .
U+s22z%#u7V7V2#&u::--@	, @	, @	, @	, @	, @	, @	, @	, "XXlB77F F**$*LL$6$6 - -D(D00(,

(:(:(<(<#&u::??%-/259~~(=(='@	, @	, @	, @	, @	, @	, @	, @	,( ,6 %- %- %-(,%- k))zzZ"?"?"?@@#s**99;;D!XXh33 0 0,QUU5"-=-=uEE 
0IZ[\[`[`aegi[j[jlqIrIr 
0().@"(E(E 4 4&*hhx&<&<&)gg.>&?&?#& !4:cC<+H+H !4+.s88OOOOO@	, @	, @	, @	, @	, @	, @	, @	,R #$%%(<"="="XGWAXAXC" 0z#U|'D'D 0'*3xxW@	, @	, @	, @	, @	, @	, @	, @	,\ ::????@@D3&&yy{{hh//g488<L3M3MgQUQYQYZfQgQg $:cC<88 $s88i@	, @	, @	, @	, @	, @	, @	, @	,p ::77788D3&&yy{{"hhvr22$ , ,A(tR%@@ ,eeO44f>N8O8OfSTSXSXYeSfSf ,:cC<#@#@ ,#&s88OOA@	, @	, @	, @	, @	, @	, @	, @	, @	, @	, @	, @	, @	, @	, @	, @	, @	, @	, @	, @	, @	, @	, @	,B     4s   	A A('A(,T B#T%T 2A/T"G6T8T 
GTGDT)T 6ATT B&TT C"T1T >T?T TT TT 
T$#T$c                 .    |                      dd          S )zNormalize version separators for matching.

    Nous uses dashes: claude-opus-4-6, claude-sonnet-4-5
    OpenRouter uses dots: claude-opus-4.6, claude-sonnet-4.5
    Normalize both to dashes for comparison.
    rx   -)r   )r0   s    r?   _normalize_model_versionrn    s     ==c"""rA   c                 X   |r|                     d          rdS 	 |                    d          }|                    d          r
|dd         }| d}|dd}t          j        ||d	
          }|j        dk    rdS |                                }|                    dg           D ]O}|                    d          | k    r4|                    d          }	t          |	t                    r
|	dk    r|	c S Pn2# t          $ r%}
t                              d|
           Y d}
~
nd}
~
ww xY wdS )zQuery Anthropic's /v1/models endpoint for context length.

    Only works with regular ANTHROPIC_API_KEY (sk-ant-api*).
    OAuth tokens (sk-ant-oat*) from Claude Code return 401.
    z
sk-ant-oatNrZ   r   r   z/v1/models?limit=1000z
2023-06-01)z	x-api-keyzanthropic-versionrz   r  r   r   r   rN   r   z%Anthropic /v1/models query failed: %s)r6   r[   r   r  r   r   r   r   r   r   r  r  )r0   rW   r  r!  r   r  rd  r   r  ri  r  s              r?   _query_anthropic_context_lengthrp    sq     g((66 tAs##== 	9D,,, !-
 
 |C"===s""4yy{{&"%% 	 	AuuT{{e##ee.//c3'' C!GGJJJ		
  A A A<a@@@@@@@@A4s%   A C8 =A8C8 6C8 8
D'D""D'c                    t                      }| |v r||                              d          S t          |                                           }|                                D ]\  }}d|v r|                    dd          d         n|}|                                |                                 k    s%t          |                                          |k    r|                    d          c S |                                 }|                                D ]\  }}d|v r|                    dd          d         n|}|                                |ft          |                                          |ffD ]j\  }}|                    |          rPt          |          t          |          k    s|t          |                   dv r|                    d          c c S kdS )u  Resolve Nous Portal model context length via OpenRouter metadata.

    Nous model IDs are bare (e.g. 'claude-opus-4-6') while OpenRouter uses
    prefixed IDs (e.g. 'anthropic/claude-opus-4.6'). Try suffix matching
    with version normalization (dot↔dash).
    rI   rZ   r5   z-:.N)r  r   rn  r9   ro   r7   r6   r   )	r0   metadatard   or_idr   baremodel_lowerr  querys	            r?   _resolve_nous_context_lengthrw    s    $%%H""#3444)%006688J (( / /u),u{{3""1%%5::<<5;;==((,DT,J,J,P,P,R,RV`,`,`99-..... -a
 ++--K (( 3 3u),u{{3""1%%5"&**,,!<?WX\?]?]?c?c?e?egq>r s 	3 	3Iu##E** 3I#e**,,	#e**0E0N0Nyy!12222222		3 4rA   config_context_lengthrs   c                    |t          |t                    r|dk    r|S t          |           } |rt          | |          }||S t	          |          rBt          |          s2t          ||          }|                    |           }|sht          |          dk    r/t          t          |                                                    }n&|                                D ]\  }}	| |v s|| v r|	} n|r,|                    d          }
t          |
t                    r|
S t          |          set          |          r+t          | |          }|r|dk    rt          | ||           |S t                               d| |t$          d           t$          S |dk    s|rd	|v rt'          | |pd
|          }|r|S |}|r|dv r|rt)          |          }|r|}|dk    rt+          |           }|r|S |rddlm}  |||           }|r|S t1                      }| |v r||                              dd          S |                                 }t5          t6                                          d d          D ]\  }}||v r|c S |r:t          |          r+t          | |          }|r|dk    rt          | ||           |S t$          S )ac  Get the context length for a model.

    Resolution order:
    0. Explicit config override (model.context_length or custom_providers per-model)
    1. Persistent cache (previously discovered via probing)
    2. Active endpoint metadata (/models for explicit custom endpoints)
    3. Local server query (for local endpoints)
    4. Anthropic /v1/models API (API-key users only, not OAuth)
    5. OpenRouter live API metadata
    6. Nous suffix-match via OpenRouter cache
    7. models.dev registry lookup (provider-aware)
    8. Thin hardcoded defaults (broad family patterns)
    9. Default fallback (128K)
    Nr   )r  r5   rI   u   Could not detect context length for model %r at %s — defaulting to %s tokens (probe-down). Set model.context_length in config.yaml to override.r   r+   rg   zhttps://api.anthropic.com)r-   r!   r   )lookup_models_dev_contextrF   c                 ,    t          | d                   S )Nr   r   )xs    r?   <lambda>z*get_model_context_length.<locals>.<lambda>r  s    s1Q4yy rA   T)r   reverse)r   r   r@   rC  re   rv   r&  r   r   nextiterr   ro   r   rk  rA  r  r@  DEFAULT_FALLBACK_CONTEXTrp  rt   rw  agent.models_devrz  r  r9   sortedDEFAULT_CONTEXT_LENGTHS)r0   rW   r  rx  rs   r  endpoint_metadatamatchedr   r   rI   	local_ctxri  effective_providerinferredrz  rr  ru  default_modelr6  s                       r?   get_model_context_lengthr     s   , (Z8Ms-S-S(XmpqXqXq$$
 #5))E  *5(;;M 8$$ ,-H-R-R ,9(GTTT#''.. 		$%%**t$5$<$<$>$>??@@ #4"9"9";";  JC||se||"' (4  	&$[[)9::N.#.. &%%*844 	, ** %7xHH	 %Q'xCCC$$KK. x$<!@!@	   ,+ ; (H44-eX5\A\^eff 	J " .!37O!O!O 	./99H .%-"V##*511 	J >>>>>>''(:EBB 	J $%%H""#3V<<< ++--K!'%%''-@-@$" " "  v K''MMM (  %h// /x@@	 	Qx;;; $#rA   r   c                 .    | sdS t          |           dz  S )z<Rough token estimate (~4 chars/token) for pre-flight checks.r   ry   r|  )r   s    r?   estimate_tokens_roughr    s     qt99>rA   messagesc                 >    t          d | D                       }|dz  S )z:Rough token estimate for a message list (pre-flight only).c              3   N   K   | ] }t          t          |                    V  !d S rb   r   r   r   msgs     r?   r   z1estimate_messages_tokens_rough.<locals>.<genexpr>  s.      88c#c((mm888888rA   ry   )sum)r  total_charss     r?   estimate_messages_tokens_roughr    s)    88x88888K!rA   )system_prompttoolsr  r  c                    d}|r|t          |          z  }| r|t          d | D                       z  }|r|t          t          |                    z  }|dz  S )u<  Rough token estimate for a full chat-completions request.

    Includes the major payload buckets Hermes sends to providers:
    system prompt, conversation messages, and tool schemas.  With 50+
    tools enabled, schemas alone can add 20-30K tokens — a significant
    blind spot when only counting messages.
    r   c              3   N   K   | ] }t          t          |                    V  !d S rb   r  r  s     r?   r   z0estimate_request_tokens_rough.<locals>.<genexpr>  s.      ==S3s3xx========rA   ry   )r   r  r   )r  r  r  r  s       r?   estimate_request_tokens_roughr    su     K *s=))) >s==H====== 's3u::&!rA   )r   r   )F)rY   F)rY   rY   NrY   )M__doc__r  r)  rK  r  pathlibr   typingr   r   r   r   urllib.parser   r  r2  hermes_constantsr	   	getLogger__name__r  	frozensetr/   r   __annotations__compile
IGNORECASEr:   r@   rB   rC   rb  r  rD   rE   r  rF  r  r  r   r   r   r]   rc   r`   re   rj   rt   rv   r   r   r   r   r   tupler   r   r   r   r   r  r&  r-  r5  rA  rC  rH  rR  rW  rk  rn  rp  rw  r  r  r  r  r   rA   r?   <module>r     s	      				 				        , , , , , , , , , , , , ! ! ! ! ! !   2 2 2 2 2 2		8	$	$
 &/Y 	0 	0 	0 	& 	& IcN 	 	 	 !bjLM  # #    & 46 tCc3h/0 5 5 5$% E % % % GI S$sDcN/B*C%C D I I I8: #T#u*%5 : : : 
    /q1 &
 w& & w& & f& w& V& V&  g!&$ %&( V)&, F-&0 v1&4 
65&8 F9&< f=&> F?&@ "'"#)$ %K& & & P   <0# 0# 0 0 0 0Dc Dd D D D DH# H$ H H H H
$h$8$ $ 	$
 }$ M$ 9$ i$ "9$ \$ ($ %f$ 
$ Y$ 	$  !$ $sCx.   (s x}    $:# :$ : : : :! ! ! ! ! !H8s 8x} 8 8 8 8v0c 0 0 0 0 #  S ZbcfZg    	S#X 	eCHo 	(SV- 	 	 	 	=T#s(^ = = = = ==DcN =x} = = = =d38n c3h    .,d3S#X#67 ,3 ,tTWY\T\~ ,bf , , , ,!+ !+ !+c4S>>Q9R !+ !+ !+ !+L X XXX X 
#tCH~
	X X X Xv5 5 5 5 5T#s(^    Cs Cc C3 C4 C C C C*S C HSM         c hsm    8C s t    &Ws Wc Whsm W W W Wt#C #C # # # #3 #  PXY\P]    >     F (,$ $$$ $ :	$
 $ 	$ $ $ $D     T$sCx.-A c     ,0	  4S>"  Dc3h()	
 	     rA   