
    j\L                        d Z ddlZddlZddlZ ej        e          Z eh d          Z eh d          Z	 ej
        dd                                          dv Zg dZd	Z ej        d
e d          ZdZ ej        de dej                  Z ej        dej                  Z ej        d          Z ej        d          Z ej        dej                  Z ej        d          Z ej        d          Z ej        d          Z ej        d          Z ej        dej                  Z ej        d          Z ej        dd                    e          z   dz             Zdddddd d!e d"e!d#e!d$e!d%e d&e d'e fd(Z"d)e d'e fd*Z#d+e d'e fd,Z$d-e d'e fd.Z%d-e d'e fd/Z&d-e d'e fd0Z'd-e d'e fd1Z(d2d2d3d-e d4e)d5e)d'e fd6Z*d7e d'e fd8Z+ e,d9 eD                       Z-d-e d'e)fd:Z.d;Z/d-e d'e)fd<Z0 G d= d>ej1                  Z2dS )?a2  Regex-based secret redaction for logs and tool output.

Applies pattern matching to mask API keys, tokens, and credentials
before they reach log files, verbose output, or gateway logs.

Short tokens (< 18 chars) are fully masked. Longer tokens preserve
the first 6 and last 4 characters for debuggability.
    N>   x-amz-signaturejwtkeyauthcodetokenapikeysecretapi_keysessionid_tokenpassword	signatureaccess_tokenclient_secretrefresh_token>   r   r   r   r   r	   r
   r   r   r   private_keyr   authorizationr   r   HERMES_REDACT_SECRETStrue>   1onyesr   )$zsk-[A-Za-z0-9_-]{10,}zghp_[A-Za-z0-9]{10,}zgithub_pat_[A-Za-z0-9_]{10,}zgho_[A-Za-z0-9]{10,}zghu_[A-Za-z0-9]{10,}zghs_[A-Za-z0-9]{10,}zghr_[A-Za-z0-9]{10,}zxox[baprs]-[A-Za-z0-9-]{10,}zAIza[A-Za-z0-9_-]{30,}zpplx-[A-Za-z0-9]{10,}zfal_[A-Za-z0-9_-]{10,}zfc-[A-Za-z0-9]{10,}zbb_live_[A-Za-z0-9_-]{10,}zgAAAA[A-Za-z0-9_=-]{20,}zAKIA[A-Z0-9]{16}zsk_live_[A-Za-z0-9]{10,}zsk_test_[A-Za-z0-9]{10,}zrk_live_[A-Za-z0-9]{10,}zSG\.[A-Za-z0-9_-]{10,}zhf_[A-Za-z0-9]{10,}zr8_[A-Za-z0-9]{10,}znpm_[A-Za-z0-9]{10,}zpypi-[A-Za-z0-9_-]{10,}zdop_v1_[A-Za-z0-9]{10,}zdoo_v1_[A-Za-z0-9]{10,}zam_[A-Za-z0-9_-]{10,}zsk_[A-Za-z0-9_]{10,}ztvly-[A-Za-z0-9]{10,}zexa_[A-Za-z0-9]{10,}zgsk_[A-Za-z0-9]{10,}zsyt_[A-Za-z0-9]{10,}zretaindb_[A-Za-z0-9]{10,}zhsk-[A-Za-z0-9]{10,}zmem0_[A-Za-z0-9]{10,}zbrv_[A-Za-z0-9]{10,}zxai-[A-Za-z0-9]{30,}z9(?:API_?KEY|TOKEN|SECRET|PASSWORD|PASSWD|CREDENTIAL|AUTH)z([A-Z0-9_]{0,50}z&[A-Z0-9_]{0,50})\s*=\s*(['\"]?)(\S+)\2z(?:api_?[Kk]ey|token|secret|password|access_token|refresh_token|auth_token|bearer|secret_value|raw_secret|secret_input|key_material)z("z")\s*:\s*"([^"]+)"z!(Authorization:\s*Bearer\s+)(\S+)z#(bot)?(\d{8,}):([-A-Za-z0-9_]{30,})zH-----BEGIN[A-Z ]*PRIVATE KEY-----[\s\S]*?-----END[A-Z ]*PRIVATE KEY-----zK((?:postgres(?:ql)?|mysql|mongodb(?:\+srv)?|redis|amqp)://[^:]+:)([^@]+)(@)z2eyJ[A-Za-z0-9_-]{10,}(?:\.[A-Za-z0-9_=-]{4,}){0,2}z (\+[1-9]\d{6,14})(?![A-Za-z0-9])z;(https?|wss?|ftp)://([^\s/?#]+)([^\s?#]*)\?([^\s#]+)(#\S*)?z+(https?|wss?|ftp)://([^/\s:@]+):([^/\s@]+)@z`\b((?:GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS|TRACE|CONNECT)\s+[^ \t\r\n\"']*?)\?([^ \t\r\n\"']+)zH^[A-Za-z_][A-Za-z0-9_.-]*=[^&\s]*(?:&[A-Za-z_][A-Za-z0-9_.-]*=[^&\s]*)+$z(?<![A-Za-z0-9_-])(|z)(?![A-Za-z0-9_-])      *** )headtailfloorplaceholderemptyvaluer   r    r!   r"   r#   returnc                d    | s|S t          |           |k     r|S | d|          d| | d          S )u  Mask a secret for display, preserving ``head`` and ``tail`` characters.

    Canonical helper for display-time redaction across Hermes — used by
    ``hermes config``, ``hermes status``, ``hermes dump``, and anywhere
    a secret needs to be shown truncated for debuggability while still
    keeping the bulk hidden.

    Args:
        value:       The secret to mask. ``None``/empty returns ``empty``.
        head:        Leading characters to preserve. Default 4.
        tail:        Trailing characters to preserve. Default 4.
        floor:       Values shorter than ``head + tail + floor_margin`` are
                     fully masked (returns ``placeholder``). Default 12 —
                     matches the existing config/status/dump convention.
        placeholder: Value returned for too-short inputs. Default ``"***"``.
        empty:       Value returned when ``value`` is falsy (None, ""). The
                     caller can override this to e.g. ``color("(not set)",
                     Colors.DIM)`` for user-facing display.

    Examples:
        >>> mask_secret("sk-proj-abcdef1234567890")
        'sk-p...7890'
        >>> mask_secret("short")                         # fully masked
        '***'
        >>> mask_secret("")                              # empty default
        ''
        >>> mask_secret("", empty="(not set)")           # empty override
        '(not set)'
        >>> mask_secret("long-token", head=6, tail=4, floor=18)
        '***'
    Nz...)len)r$   r   r    r!   r"   r#   s         1/home/ubuntu/.hermes/hermes-agent/agent/redact.pymask_secretr)      sO    P  
5zzEETEl..udUVV}...    r   c                 0    | sdS t          | ddd          S )uO   Mask a log token — conservative 18-char floor, preserves 6 prefix / 4 suffix.r      r      )r   r    r!   )r)   )r   s    r(   _mask_tokenr.      s(      uu11B7777r*   queryc                 `   | s| S g }|                      d          D ]~}d|vr|                    |           |                    d          \  }}}|                                t          v r|                    | d           i|                    |           d                    |          S )a   Redact sensitive parameter values in a URL query string.

    Handles `k=v&k=v` format. Sensitive keys (case-insensitive) have values
    replaced with `***`. Non-sensitive keys pass through unchanged.
    Empty or malformed pairs are preserved as-is.
    &=z=***)splitappend	partitionlower_SENSITIVE_QUERY_PARAMSjoin)r/   partspairr   _r$   s         r(   _redact_query_stringr<      s      EC    d??LLs++Q99;;111LLC&&&&LL88E??r*   textc                 f    dt           j        dt          fd}t                              ||           S )u   Scan text for URLs with query strings and redact sensitive params.

    Catches opaque tokens that don't match vendor prefix regexes, e.g.
    `https://example.com/cb?code=ABC123&state=xyz` → `...?code=***&state=xyz`.
    mr%   c                    |                      d          }|                      d          }|                      d          }t          |                      d                    }|                      d          pd}| d| | d| | S )	N         r      r   ://?groupr<   )r?   scheme	authoritypathr/   fragments         r(   _subz&_redact_url_query_params.<locals>._sub  s    GGAJJ	wwqzz$QWWQZZ00771::#@@Y@@@u@h@@@r*   )reMatchstr_URL_WITH_QUERY_REsubr=   rM   s     r(   _redact_url_query_paramsrT     sD    A AS A A A A !!$---r*   c                 :    t                               d |           S )zStrip `user:password@` from HTTP/WS/FTP URLs.

    DB protocols (postgres, mysql, mongodb, redis, amqp) are handled
    separately by `_DB_CONNSTR_RE`.
    c                 ^    |                      d           d|                      d           dS )NrA   rE   rB   z:***@rH   r?   s    r(   <lambda>z&_redact_url_userinfo.<locals>.<lambda>(  s+    QWWQZZ55AGGAJJ555 r*   )_URL_USERINFO_RErR   r=   s    r(   _redact_url_userinfor\   !  s%     55  r*   c                 f    dt           j        dt          fd}t                              ||           S )zARedact sensitive query params in HTTP access-log request targets.r?   r%   c                 ~    |                      d          }t          |                      d                    }| d| S )NrA   rB   rF   rG   )r?   prefixr/   s      r(   rM   z6_redact_http_request_target_query_params.<locals>._sub/  s<    $QWWQZZ00""5"""r*   )rN   rO   rP   _HTTP_REQUEST_TARGET_QUERY_RErR   rS   s     r(   (_redact_http_request_target_query_paramsra   -  s>    # #S # # # # ),,T4888r*   c                     | rd| v sd| vr| S t                               |                                           s| S t          |                                           S )uN  Redact sensitive values in a form-urlencoded body.

    Only applies when the entire input looks like a pure form body
    (k=v&k=v with no newlines, no other text). Single-line non-form
    text passes through unchanged. This is a conservative pass — the
    `_redact_url_query_params` function handles embedded query strings.
    
r1   )_FORM_BODY_REmatchstripr<   r[   s    r(   _redact_form_bodyrg   6  sZ      44<<3d??tzz||,, 

---r*   F)force	code_filerh   ri   c                "   | dS t          | t                    st          |           } | s| S |s	t          s| S t          |           rt                              d |           } |sHd| v rd }t                              ||           } d| v r"d| v rd }t                              ||           } d| v sd	| v rt                              d
 |           } d| v rd }t                              ||           } d| v rd| v rt                              d|           } d| v rt                              d |           } d| v rt                              d |           } d| v rd| v rt          |           } d| v rd }t                              ||           } | S )u  Apply all redaction patterns to a block of text.

    Safe to call on any string -- non-matching text passes through unchanged.
    Enabled by default. Disable via security.redact_secrets: false in config.yaml.
    Set force=True for safety boundaries that must never return raw secrets
    regardless of the user's global logging redaction preference.

    Set code_file=True to skip the ENV-assignment and JSON-field regex
    patterns when the text is known to be source code (e.g. MAX_TOKENS=***
    constants, "apiKey": "test" fixtures). Prefix patterns, auth headers,
    private keys, DB connstrings, JWTs, and URL secrets are still redacted.

    Performance: each regex pattern is gated behind a cheap substring
    pre-check (e.g. ``"=" in text`` for ENV assignments, ``"://" in text``
    for URLs, ``"eyJ" in text`` for JWTs). On a typical hermes log line
    (no secrets) this drops the 13-pattern scan from ~5.6us to ~1.8us per
    record (-68%). The pre-checks are conservative — false positives
    still run the full regex, which then doesn't match. False negatives
    are impossible because every regex requires the gated substring to
    match.
    Nc                 F    t          |                     d                    S )NrA   r.   rH   rX   s    r(   rY   z'redact_sensitive_text.<locals>.<lambda>g  s    AGGAJJ(?(? r*   r2   c                     |                      d          |                      d          |                      d          }}}| d| t          |           | S )NrA   rB   rC   r2   rH   r.   )r?   namequoter$   s       r(   _redact_envz*redact_sensitive_text.<locals>._redact_envl  sS    %&WWQZZQWWQZZUeCCCE(:(:CECCCr*   :"c                     |                      d          |                      d          }}| dt          |           dS )NrA   rB   z: "rs   rn   )r?   r   r$   s      r(   _redact_jsonz+redact_sensitive_text.<locals>._redact_jsons  s>    WWQZZU77+e"4"47777r*   uthorizationUTHORIZATIONc                 r    |                      d          t          |                      d                    z   S )NrA   rB   rn   rX   s    r(   rY   z'redact_sensitive_text.<locals>.<lambda>}  s'    aggajj;qwwqzz#:#:: r*   c                 h    |                      d          pd}|                      d          }| | dS )NrA   r   rB   z:***rW   )r?   r_   digitss      r(   _redact_telegramz/redact_sensitive_text.<locals>._redact_telegram  s9    WWQZZ%2FWWQZZF*f****r*   BEGINz-----z[REDACTED PRIVATE KEY]rE   c                 \    |                      d           d|                      d           S )NrA   r   rC   rW   rX   s    r(   rY   z'redact_sensitive_text.<locals>.<lambda>  s(    qwwqzz,J,Jaggajj,J,J r*   eyJc                 F    t          |                     d                    S )Nr   rl   rX   s    r(   rY   z'redact_sensitive_text.<locals>.<lambda>  s    [%<%< r*   r1   +c                     |                      d          }t          |          dk    r|d d         dz   |dd          z   S |d d         dz   |dd          z   S )NrA      rB   z****r   )rH   r'   )r?   phones     r(   _redact_phonez,redact_sensitive_text.<locals>._redact_phone  s`    GGAJJE5zzQRaRy6)E"##J66!9v%bcc
22r*   )
isinstancerP   _REDACT_ENABLED_has_known_prefix_substring
_PREFIX_RErR   _ENV_ASSIGN_RE_JSON_FIELD_RE_AUTH_HEADER_RE_TELEGRAM_RE_PRIVATE_KEY_RE_DB_CONNSTR_RE_JWT_RErg   _SIGNAL_PHONE_RE)r=   rh   ri   rq   ru   r{   r   s          r(   redact_sensitive_textr   F  s   , |tdC   4yy  _  #4(( G~~??FF  :$;;D D D "%%k488D $;;3$;;8 8 8 "%%lD99D
 4!7!7""::
 
 d{{	+ 	+ 	+  0$77 $7d??""#;TBB }}!!"J"JDQQ }}{{<<dCC d{{sd{{ && d{{	3 	3 	3
  ##M488Kr*   patternc                 T    d}t          |           D ]\  }}||v r| d|         c S | S )aG  Return the leading literal characters of a regex pattern.

    Stops at the first regex metacharacter (``[``, ``(``, ``\``, ``.``,
    ``?``, ``*``, ``+``, ``|``, ``{``, ``^``, ``$``).  Returns the literal
    that any match of the pattern MUST contain as a substring, so the
    pre-screen never produces false negatives.
    z[(\.?*+|{^$N)	enumerate)r   metaichs       r(   _extract_literal_prefixr     sK     D7##  2::2A2; Nr*   c              #   4   K   | ]}t          |          V  d S N)r   ).0ps     r(   	<genexpr>r     s<        #$A     r*   c                 D     t           fdt          D                       S )zReturn True if ``text`` contains any known credential prefix substring.

    Used as a cheap pre-check before invoking the expensive ``_PREFIX_RE``.
    c              3       K   | ]}|v V  	d S r    )r   r   r=   s     r(   r   z._has_known_prefix_substring.<locals>.<genexpr>  s'      55QqDy555555r*   )any_PREFIX_SUBSTRINGSr[   s   `r(   r   r     s)    
 5555"4555555r*   )	zGET zPOST zPUT zPATCH zDELETE zHEAD zOPTIONS zTRACE zCONNECT c                 l    |                                  t          fdt          D                       S )z?Cheap pre-check before scanning for access-log request targets.c              3       K   | ]}|v V  	d S r   r   )r   methoduppers     r(   r   z-_has_http_method_substring.<locals>.<genexpr>  s'      EE6vEEEEEEr*   )r   r   _HTTP_METHOD_SUBSTRINGS)r=   r   s    @r(   _has_http_method_substringr     s4    JJLLEEEEE-DEEEEEEr*   c                   B     e Zd ZdZd fd	Zdej        def fdZ xZ	S )	RedactingFormatterz9Log formatter that redacts secrets from all log messages.N%c                 @     t                      j        |||fi | d S r   )super__init__)selffmtdatefmtstylekwargs	__class__s        r(   r   zRedactingFormatter.__init__  s,    gu7777777r*   recordr%   c                 d    t                                          |          }t          |          S r   )r   formatr   )r   r   originalr   s      r(   r   zRedactingFormatter.format  s&    77>>&))$X...r*   )NNr   )
__name__
__module____qualname____doc__r   logging	LogRecordrP   r   __classcell__)r   s   @r(   r   r     sr        CC8 8 8 8 8 8/W. /3 / / / / / / / / / /r*   r   )3r   r   osrN   	getLoggerr   logger	frozensetr7   _SENSITIVE_BODY_KEYSgetenvr6   r   _PREFIX_PATTERNS_SECRET_ENV_NAMEScompiler   _JSON_KEY_NAMES
IGNORECASEr   r   r   r   r   r   r   rQ   rZ   r`   rd   r8   r   rP   intr)   r.   r<   rT   r\   ra   rg   boolr   r   tupler   r   r   r   	Formatterr   r   r*   r(   <module>r      s     				 						8	$	$
 $) % % %   , !y " " "   4 ")3V<<BBDDHbb% % % P Q U+UUU 
 Z-/---M  "*(M  rz* 
 "*O  RM  "*%  2:ABB 
  RZ   2:2   !+
M! !  
O 
 RZSXX&6777:OO 
 ,/ ,/ ,/,/ ,/ 	,/
 ,/ ,/ ,/ 	,/ ,/ ,/ ,/^8s 8s 8 8 8 8     ,.3 .3 . . . . 	s 	s 	 	 	 	93 93 9 9 9 9.C .C . . . .  7<u e e e et e eY\ e e e efS S     U  (8     
6c 6d 6 6 6 6
 FS FT F F F F/ / / / /* / / / / /r*   