
    Fij=/                        U d Z ddlmZ ddlZddlmZmZmZ g dZde	d<    e
h d          Zi ad	e	d
<   ddZ e             dddZdddZg dZdS )u  Shared threat-pattern library for context window security scanning.

This module is the single source of truth for prompt-injection / promptware /
exfiltration patterns used across the context-assembly scanners
(``agent/prompt_builder.py``, ``tools/memory_tool.py``) and the tool-result
delimiter system in ``agent/tool_dispatch_helpers.py``.

Pattern philosophy
------------------
Patterns are organized by ATTACK CLASS, not by source file.  Each pattern
is a ``(regex, pattern_id, scope)`` tuple, where ``scope`` controls which
scanners use it:

- ``"all"``  — applied everywhere (classic prompt injection, exfiltration)
- ``"context"`` — applied to context files + memory + tool results
  (promptware / C2 / behavioral hijack; broader detection)
- ``"strict"`` — applied to memory writes + skill installs only
  (aggressive checks acceptable for user-curated content but too noisy
  for tool results)

The split exists because tool results contain web pages, GitHub issues,
and MCP responses — content the user did not author — and we want broad
detection there, but blocking is reserved for paths where the user can
intervene (memory writes, skill installs).

Pattern anchoring
-----------------
New patterns anchor on **C2-specific vocabulary or unambiguous attack
behavior**, NOT on bossy English.  Phrases like "you are obligated to"
or "you must" alone are too common in legitimate instruction-writing
(see AGENTS.md, CLAUDE.md, etc.) to flag.  See the pattern comments for
the rationale on borderline cases.

Multi-word bypass
-----------------
Patterns use ``(?:\w+\s+)*`` between key tokens to prevent attackers
from inserting filler words (e.g. "ignore all prior instructions" instead
of "ignore all instructions").  This mirrors the fix applied to
``skills_guard.py`` in commit 4ea29978.
    )annotationsN)ListOptionalTuple)$)zHignore\s+(?:\w+\s+)*(previous|all|above|prior)\s+(?:\w+\s+)*instructionsprompt_injectionall)zsystem\s+prompt\s+overridesys_prompt_overrider   )zRdisregard\s+(?:\w+\s+)*(your|all|any)\s+(?:\w+\s+)*(instructions|rules|guidelines)disregard_rulesr   )zwact\s+as\s+(if|though)\s+(?:\w+\s+)*you\s+(?:\w+\s+)*(have\s+no|don\'t\s+have)\s+(?:\w+\s+)*(restrictions|limits|rules)bypass_restrictionsr   )z9<!--[^>]*(?:ignore|override|system|secret|hidden)[^>]*-->html_comment_injectionr   )z5<\s*div\s+style\s*=\s*["\'][\s\S]*?display\s*:\s*none
hidden_divr   )z5translate\s+.*\s+into\s+.*\s+and\s+(execute|run|eval)translate_executer   )z2do\s+not\s+(?:\w+\s+)*tell\s+(?:\w+\s+)*the\s+userdeception_hider   )z,you\s+are\s+(?:\w+\s+)*now\s+(?:a|an|the)\s+role_hijackcontext)z+pretend\s+(?:\w+\s+)*(you\s+are|to\s+be)\s+role_pretendr   )z-output\s+(?:\w+\s+)*(system|initial)\s+promptleak_system_promptr   )zW(respond|answer|reply)\s+without\s+(?:\w+\s+)*(restrictions|limitations|filters|safety)remove_filtersr   )z>you\s+have\s+been\s+(?:\w+\s+)*(updated|upgraded|patched)\s+tofake_updater   )z\bname\s+yourself\s+\w+identity_overrider   )zregister\s+(as\s+)?a?\s*nodec2_node_registrationr   )z0(heartbeat|beacon|check[\s\-]?in)\s+(to|with)\s+c2_heartbeatr   )z,pull\s+(down\s+)?(?:new\s+)?task(?:ing|s)?\bc2_task_pullr   )zconnect\s+to\s+the\s+network\bc2_network_connectr   )z>you\s+must\s+(?:\w+\s+){0,3}(register|connect|report|beacon)\bforced_actionr   )z only\s+use\s+one[\s\-]?liners?\banti_forensic_onelinerr   )zRnever\s+(?:\w+\s+)*(?:create|write)\s+(?:\w+\s+)*(?:script|file)\s+(?:\w+\s+)*diskanti_forensic_diskr   )z<unset\s+\w*(?:CLAUDE|CODEX|HERMES|AGENT|OPENAI|ANTHROPIC)\w*env_var_unset_agentr   )zG\b(?:praxis|cobalt\s*strike|sliver|havoc|mythic|metasploit|brainworm)\bknown_c2_frameworkr   )z1\bc2\s+(?:server|channel|infrastructure|beacon)\bc2_explicitr   )z\bcommand\s+and\s+control\bc2_explicit_longr   )z?curl\s+[^\n]*\$\{?\w*(KEY|TOKEN|SECRET|PASSWORD|CREDENTIAL|API)
exfil_curlr   )z?wget\s+[^\n]*\$\{?\w*(KEY|TOKEN|SECRET|PASSWORD|CREDENTIAL|API)
exfil_wgetr   )zAcat\s+[^\n]*(\.env|credentials|\.netrc|\.pgpass|\.npmrc|\.pypirc)read_secretsr   )z6(send|post|upload|transmit)\s+.*\s+(to|at)\s+https?://send_to_urlstrict)z{(include|output|print|share)\s+(?:\w+\s+)*(conversation|chat\s+history|previous\s+messages|full\s+context|entire\s+context)context_exfilr&   )authorized_keysssh_backdoorr&   )z\$HOME/\.ssh|\~/\.ssh
ssh_accessr&   )z'\$HOME/\.hermes/\.env|\~/\.hermes/\.env
hermes_envr&   )zj(update|modify|edit|write|change|append|add\s+to)\s+.*(?:AGENTS\.md|CLAUDE\.md|\.cursorrules|\.clinerules)agent_config_modr&   )zV(update|modify|edit|write|change|append|add\s+to)\s+.*\.hermes/(config\.yaml|SOUL\.md)hermes_config_modr&   )zI(?:api[_-]?key|token|secret|password)\s*[=:]\s*["\'][A-Za-z0-9+/=_-]{20,}hardcoded_secretr&   zList[Tuple[str, str, str]]	_PATTERNS>      ​   ‌   ‍   ‪   ‫   ‬   ‭   ‮   ⁠   ⁢   ⁣   ⁤   ⁦   ⁧   ⁨   ⁩   ﻿z'dict[str, List[Tuple[re.Pattern, str]]]	_COMPILEDreturnNonec                    t           rdS g } g }g }t          D ]\  }}}t          j        |t          j                  }||f}|dk    r@|                     |           |                    |           |                    |           o|dk    r+|                    |           |                    |           |dk    r|                    |           t          d|d|          | ||da dS )a  Compile pattern sets for each scope (all / context / strict).

    A pattern with scope="all" lands in every set.  A pattern with
    scope="context" lands in context + strict (context implies the
    strict scanners want it too).  Scope="strict" lands in strict only.
    Nr   r   r&   zthreat_patterns: unknown scope z for pattern )r   r   r&   )rA   r/   recompile
IGNORECASEappend
ValueError)all_patternscontext_patternsstrict_patternspatternpidscopecompiledentrys           :/home/ubuntu/.hermes/hermes-agent/tools/threat_patterns.py_compilerS      s0     13L5746O( ^ ^e:gr}553E>>&&&##E***""5))))i##E***""5))))h""5))))\u\\UX\\]]] #! III    r   contentstrrO   	List[str]c                Z   | sg S g }t          |           }|t          z  }|D ](}|                    dt          |          d           )t                              |          }|t          d|          |D ]/\  }}|                    |           r|                    |           0|S )u  Return a list of matched pattern IDs in ``content`` at the given scope.

    ``scope`` selects which pattern set to apply:

    - ``"all"`` (narrow): classic injection + exfil only — minimal false
      positives, suitable for any text.
    - ``"context"`` (default): adds promptware / C2 / role-play patterns —
      suitable for context files, memory entries, and tool results.
    - ``"strict"`` (broad): adds persistence / SSH backdoor / exfil-URL
      patterns — appropriate for user-mediated writes (memory tool,
      skills install) where false positives can be resolved interactively.

    Also checks for invisible unicode characters (returned as
    ``"invisible_unicode_U+XXXX"`` so the caller can surface the offending
    codepoint in a log line).
    zinvisible_unicode_U+04XNz scan_for_threats: unknown scope )setINVISIBLE_CHARSrH   ordrA   getrI   search)	rU   rO   findingschar_setinvisible_hitschpatternsrP   rN   s	            rR   scan_for_threatsrd      s    "  	H 7||H/N > ><s2ww<<<==== }}U##HEEEEFFF! ! !#??7## 	!OOC   OrT   r&   Optional[str]c                    t          | |          }|sdS |d         }|                    d          r|                    dd          }d| dS d| d	S )
zReturn a human-readable error string for the first threat found, or None.

    Convenience wrapper used by paths that block on the first hit
    (memory tool writes, skills install) where the caller just needs a
    yes/no + a message.
    )rO   Nr   invisible_unicode_ z6Blocked: content contains invisible unicode character z (possible injection).z)Blocked: content matches threat pattern 'zf'. Content is injected into the system prompt and must not contain injection or exfiltration payloads.)rd   
startswithreplace)rU   rO   r_   rN   	codepoints        rR   first_threat_messagerl      s      u555H t
1+C
~~*++ jKK 4b99	i	iiii	/C 	/ 	/ 	/rT   )r[   rd   rl   )rB   rC   )r   )rU   rV   rO   rV   rB   rW   )r&   )rU   rV   rO   rV   rB   re   )__doc__
__future__r   rE   typingr   r   r   r/   __annotations__	frozensetr[   rA   rS   rd   rl   __all__ rT   rR   <module>rt      s  ' ' 'R # " " " " " 				 ( ( ( ( ( ( ( ( ( (B) B) B)	 B B B BP )     . 68	 7 7 7 7" " " "J 	


% % % % %P    *  rT   