
    j"                     J    d Z ddlmZmZ ddlmZmZmZ  G d de          ZdS )u  Abstract base class for pluggable context engines.

A context engine controls how conversation context is managed when
approaching the model's token limit. The built-in ContextCompressor
is the default implementation. Third-party engines (e.g. LCM) can
replace it via the plugin system or by being placed in the
``plugins/context_engine/<name>/`` directory.

Selection is config-driven: ``context.engine`` in config.yaml.
Default is ``"compressor"`` (the built-in). Only one engine is active.

The engine is responsible for:
  - Deciding when compaction should fire
  - Performing compaction (summarization, DAG construction, etc.)
  - Optionally exposing tools the agent can call (e.g. lcm_grep)
  - Tracking token usage from API responses

Lifecycle:
  1. Engine is instantiated and registered (plugin register() or default)
  2. on_session_start() called when a conversation begins
  3. update_from_response() called after each API response with usage data
  4. should_compress() checked after each turn
  5. compress() called when should_compress() returns True
  6. on_session_end() called at real session boundaries (CLI exit, /reset,
     gateway session expiry) — NOT per-turn
    )ABCabstractmethod)AnyDictListc                      e Zd ZU dZeedefd                        ZdZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d<   dZe	e
d	<   dZe	e
d
<   dZee
d<   dZe	e
d<   dZe	e
d<   edeeef         ddfd            Zed.de	defd            Ze	 	 d/deeeef                  de	dedeeeef                  fd            Zdeeeef                  defdZde	defdZdeeeef                  defdZdeddfdZdedeeeef                  ddfd Zd0d!Z deeeef                  fd"Z!d#ed$eeef         defd%Z"deeef         fd&Z#	 	 	 	 d1d(ed	e	d)ed*ed+ed,eddfd-Z$dS )2ContextEnginez.Base class all context engines must implement.returnc                     dS )z,Short identifier (e.g. 'compressor', 'lcm').N selfs    9/home/ubuntu/.hermes/hermes-agent/agent/context_engine.pynamezContextEngine.name%             r   last_prompt_tokenslast_completion_tokenslast_total_tokensthreshold_tokenscontext_lengthcompression_countg      ?threshold_percent   protect_first_n   protect_last_nusageNc                     dS )a  Update tracked token usage from an API response.

        Called after every LLM call with a normalized usage dict. The legacy
        keys ``prompt_tokens``, ``completion_tokens``, and ``total_tokens``
        are always present. Newer hosts also include canonical buckets:
        ``input_tokens``, ``output_tokens``, ``cache_read_tokens``,
        ``cache_write_tokens``, and ``reasoning_tokens``. Engines should
        treat those fields as optional for compatibility with older hosts.
        Nr   )r   r   s     r   update_from_responsez"ContextEngine.update_from_responseF   r   r   prompt_tokensc                     dS )z0Return True if compaction should fire this turn.Nr   )r   r!   s     r   should_compresszContextEngine.should_compressR   r   r   messagescurrent_tokensfocus_topicc                     dS )u  Compact the message list and return the new message list.

        This is the main entry point. The engine receives the full message
        list and returns a (possibly shorter) list that fits within the
        context budget. The implementation is free to summarize, build a
        DAG, or do anything else — as long as the returned list is a valid
        OpenAI-format message sequence.

        Args:
            focus_topic: Optional topic string from manual ``/compress <focus>``.
                Engines that support guided compression should prioritise
                preserving information related to this topic.  Engines that
                don't support it may simply ignore this argument.
        Nr   )r   r$   r%   r&   s       r   compresszContextEngine.compressV   r   r   c                     dS )zQuick rough check before the API call (no real token count yet).

        Default returns False (skip pre-flight). Override if your engine
        can do a cheap estimate.
        Fr   r   r$   s     r   should_compress_preflightz'ContextEngine.should_compress_preflightn   s	     ur   rough_tokensc                     dS )a  Return True when preflight should trust recent real usage instead.

        Built-in compression uses this to avoid re-compacting from known-noisy
        rough estimates after a compressed request has already fit. Third-party
        engines can ignore it safely.
        Fr   )r   r,   s     r   $should_defer_preflight_to_real_usagez2ContextEngine.should_defer_preflight_to_real_usagev   s	     ur   c                     dS )u  Quick check: is there anything in ``messages`` that can be compacted?

        Used by the gateway ``/compress`` command as a preflight guard —
        returning False lets the gateway report "nothing to compress yet"
        without making an LLM call.

        Default returns True (always attempt).  Engines with a cheap way
        to introspect their own head/tail boundaries should override this
        to return False when the transcript is still entirely protected.
        Tr   r*   s     r   has_content_to_compressz%ContextEngine.has_content_to_compress   s	     tr   
session_idc                     dS )zCalled when a new conversation session begins.

        Use this to load persisted state (DAG, store) for the session.
        kwargs may include hermes_home, platform, model, etc.
        Nr   )r   r1   kwargss      r   on_session_startzContextEngine.on_session_start   r   r   c                     dS )u   Called at real session boundaries (CLI exit, /reset, gateway expiry).

        Use this to flush state, close DB connections, etc.
        NOT called per-turn — only when the session truly ends.
        Nr   )r   r1   r$   s      r   on_session_endzContextEngine.on_session_end   r   r   c                 >    d| _         d| _        d| _        d| _        dS )zyCalled on /new or /reset. Reset per-session state.

        Default resets compression_count and token tracking.
        r   N)r   r   r   r   r   s    r   on_session_resetzContextEngine.on_session_reset   s*    
 #$&'#!"!"r   c                     g S )zReturn tool schemas this engine provides to the agent.

        Default returns empty list (no tools). LCM would return schemas
        for lcm_grep, lcm_describe, lcm_expand here.
        r   r   s    r   get_tool_schemaszContextEngine.get_tool_schemas   s	     	r   r   argsc                 >    ddl }|                    dd| i          S )zHandle a tool call from the agent.

        Only called for tool names returned by get_tool_schemas().
        Must return a JSON string.

        kwargs may include:
          messages: the current in-memory message list (for live ingestion)
        r   NerrorzUnknown context engine tool: )jsondumps)r   r   r;   r3   r>   s        r   handle_tool_callzContextEngine.handle_tool_call   s-     	zz7$JD$J$JKLLLr   c                     | j         | j        | j        | j        r t          d| j         | j        z  dz            nd| j        dS )zsReturn status dict for display/logging.

        Default returns the standard fields run_agent.py expects.
        d   r   )r   r   r   usage_percentr   )r   r   r   minr   r   s    r   
get_statuszContextEngine.get_status   sZ     #'"9 $ 5"1 &.C043FFLMMM,-!%!7	
 	
 		
r    modelbase_urlapi_keyproviderapi_modec                 L    || _         t          || j        z            | _        dS )a  Called when the user switches models or on fallback activation.

        Default updates context_length and recalculates threshold_tokens
        from threshold_percent. Override if your engine needs more
        (e.g. recalculate DAG budgets, switch summary models).
        N)r   intr   r   )r   rG   r   rH   rI   rJ   rK   s          r   update_modelzContextEngine.update_model   s*     - #NT5K$K L Lr   )N)NN)r
   N)rF   rF   rF   rF   )%__name__
__module____qualname____doc__propertyr   strr   r   rM   __annotations__r   r   r   r   r   r   floatr   r   r   r   r    boolr#   r   r(   r+   r.   r0   r4   r6   r8   r:   r@   rE   rN   r   r   r   r	   r	       s        88 ;c ; ; ; ^ X;  "#C###scNCs  $u###OSNC 	$sCx. 	T 	 	 	 ^	 ? ?S ?D ? ? ? ^?  #	 tCH~&  	
 
d38n	   ^.$tCH~2F 4         T#s(^0D     3 T     T#s(^8L QU    # # # #$tCH~"6    
MS 
MS#X 
MS 
M 
M 
M 
M
DcN 
 
 
 
, M MM M 	M
 M M M 
M M M M M Mr   r	   N)	rR   abcr   r   typingr   r   r   r	   r   r   r   <module>rZ      s    6 $ # # # # # # # " " " " " " " " " "BM BM BM BM BMC BM BM BM BM BMr   