+
    iD&                      a  0 t $ R t^ RIHt ^ RIt^ RIt^ RIt^ RIt^ RIt^ RI	H
t
 ^ RIHtHtHtHtHt ^ RIHt ^ RIHt ]P*                  ! ]4      tRRR	. R
. /tRt]P4                  ! 4       tRsR]R&   RsR]R&   RsR]R&   R R lt  ! R R]!4      t"R R lt#R R lt$R R lt%R)R R llt&R)R R  llt'R! R" lt(R# R$ lt)R% R& lt*R)R' R( llt+R# )*a  Website access policy helpers for URL-capable tools.

This module loads a user-managed website blocklist from ~/.hermes/config.yaml
and optional shared list files. It is intentionally lightweight so web/browser
tools can enforce URL policy without pulling in the heavier CLI config stack.

Policy is cached in memory with a short TTL so config changes take effect
quickly without re-reading the file on every URL check.
)annotationsN)Path)AnyDictListOptionalTuple)urlparseget_hermes_homeenabledFdomainsshared_filesg      >@zOptional[Dict[str, Any]]_cached_policyOptional[str]_cached_policy_pathg        float_cached_policy_timec                   V ^8  d   QhRR/# )   returnr    )formats   "1/home/ubuntu/hermes-agent/tools/website_policy.py__annotate__r   )   s     - -$ -    c                 $    \        4       R ,          # )zconfig.yamlr
   r   r   r   _get_default_config_pathr   )   s    },,r   c                      ] tR t^-tRtRtR# )WebsitePolicyErrorz/Raised when a website policy file is malformed.r   N)__name__
__module____qualname____firstlineno____doc____static_attributes__r   r   r   r   r   -   s    9r   r   c                    V ^8  d   QhRRRR/# )r   hoststrr   r   )r   s   "r   r   r   1   s     4 4# 4# 4r   c                p    T ;'       g    R P                  4       P                  4       P                  R4      # ) .)striplowerrstrip)r'   s   &r   _normalize_hostr/   1   s+    JJB%%'..s33r   c                    V ^8  d   QhRRRR/# )r   ruler   r   r   r   )r   s   "r   r   r   5   s      # - r   c                   \        V \        4      '       g   R # V P                  4       P                  4       pV'       d   VP	                  R4      '       d   R # RV9   d,   \        V4      pVP                  ;'       g    VP                  pVP                  R^4      ^ ,          P                  4       P                  R4      pVP	                  R4      '       d
   VR,          pT;'       g    R # )N#:///r+   zwww.:   NN)

isinstancer(   r,   r-   
startswithr	   netlocpathsplitr.   )r1   valueparseds   &  r   _normalize_ruler>   5   s    dC  JJL EE$$S))~%,,KKQ"((*11#6Eb	==Dr   c                    V ^8  d   QhRRRR/# )r   r:   r   r   z	List[str]r   )r   s   "r   r   r   D   s      T i r   c                    V P                  RR7      p. pTP                  4        FZ  pTP                  4       pT'       d   TP                  R4      '       d   K4  \        T4      pT'       g   KI  TP                  T4       K\  	  T#   \         d    \        P                  RT 4       . u # \        \
        3 d#   p\        P                  RY4       . u Rp?# Rp?ii ; i)u   Load rules from a shared blocklist file.

Missing or unreadable files log a warning and return an empty list
rather than raising — a bad file path should not disable all web tools.
utf-8encodingz.Shared blocklist file not found (skipping): %sz6Failed to read shared blocklist file %s (skipping): %sNr3   )	read_textFileNotFoundErrorloggerwarningOSErrorUnicodeDecodeError
splitlinesr,   r8   r>   append)r:   rawexcruleslinestripped
normalizeds   &      r   _iter_blocklist_file_rulesrR   D   s    nngn. E ::<8..s33$X.
:LL$ ! L  GN	'( OQU[	s)   B #C+C:C;CCCc                    V ^8  d   QhRRRR/# r   config_pathOptional[Path]r   zDict[str, Any]r   )r   s   "r   r   r   ^   s     # #^ #~ #r   c                   T ;'       g    \        4       p V P                  4       '       g   \        \        4      #  ^ RIp \        T RR7      ;_uu_ 4       pTP                  T4      ;'       g    / pRRR4       \        X\        4      '       g   \        R4      hTP                  R	/ 4      pTf   / p\        T\        4      '       g   \        R
4      hTP                  R/ 4      pTf   / p\        T\        4      '       g   \        R4      h\        \        4      pTP                  T4       T#   \
         d(    \        P                  R4       \        \        4      u # i ; i  + '       g   i     L; i  TP                   d   p\        RT  RT 24      ThRp?i\         d   p\        RT  RT 24      ThRp?ii ; i)    Nu3   PyYAML not installed — website blocklist disabledrA   rB   zInvalid config YAML at z: zFailed to read config file zconfig root must be a mappingsecurityzsecurity must be a mappingwebsite_blocklistz,security.website_blocklist must be a mapping)r   existsdict_DEFAULT_WEBSITE_BLOCKLISTyamlImportErrorrF   debugopen	safe_load	YAMLErrorr   rH   r7   getupdate)rU   r^   fconfigrM   rY   rZ   policys   &       r   _load_policy_configri   ^   s   ;;!9!;K.//0
^+00A^^A&,,"F 1 fd## !@AAzz*b)Hh%% !=>> %8"= '.. !OPP,-F
MM#$M9  0JK.//0
 10>> Z #:;-r#!OPVYY ^ #>{m2cU!STZ]]^sY   D.  E6 E#1E6 ./E E #E3	.E6 3E6 6F>FF>&F>'F99F>c                    V ^8  d   QhRRRR/# rT   r   )r   s   "r   r   r      s     D D D. Dr   c           
        V '       d   \        V 4      MRp\        P                  ! 4       pV fQ   \        ;_uu_ 4        \        e2   \
        V8X  d'   V\        ,
          \        8  d   \        uuRRR4       # RRR4       T ;'       g    \        4       p \        V 4      pVP                  R. 4      ;'       g    . p\        V\        4      '       g   \        R4      hVP                  R. 4      ;'       g    . p\        V\        4      '       g   \        R4      hVP                  RR4      p\        V\        4      '       g   \        R	4      h. p\        4       pV FK  p	\!        V	4      p
V
'       g   K  R
V
3V9  g   K#  VP#                  RV
RR
/4       VP%                  R
V
34       KM  	  V F  p\        V\         4      '       d   VP'                  4       '       g   K1  \)        V4      P+                  4       pVP-                  4       '       g    \/        4       V,          P1                  4       p\3        V4       FG  p
\        V4      V
3pW9   d   K  VP#                  RV
R\        V4      /4       VP%                  V4       KI  	  K  	  RVRV/pV \        4       8X  d    \        ;_uu_ 4        VsRsVsRRR4       V# V#   + '       g   i     ELK; i  + '       g   i     T# ; i)zLoad and return the parsed website blocklist policy.

Results are cached for ``_CACHE_TTL_SECONDS`` to avoid re-reading
config.yaml on every URL check.  Pass an explicit ``config_path``
to bypass the cache (used by tests).
__default__Nr   z1security.website_blocklist.domains must be a listr   z6security.website_blocklist.shared_files must be a listr   Tz4security.website_blocklist.enabled must be a booleanrg   patternsourcerN   )r(   time	monotonic_cache_lockr   r   r   _CACHE_TTL_SECONDSr   ri   rd   r7   listr   boolsetr>   rK   addr,   r   
expanduseris_absoluter   resolverR   )rU   resolved_pathnowrh   raw_domainsraw_shared_filesr   rN   seenraw_rulerQ   shared_filer:   keyresults   &              r   load_website_blocklistr      s_    )4C$M
..
C [*'=8..2DD% [[ ;;!9!;K -F**Y+11rKk4(( !TUUzz."5;;&-- !YZZjjD)Ggt$$ !WXX"$E!$D$X.
:8Z0<LL)Z8DEHHh
+,	   (+s++;3D3D3F3FK ++-!!#%,557D4T:Jt9j)C{LL)Z3t9EFHHSM ; ( '51F .00[#N"/"% 
 M6Mm [[b [
 Ms   /J9&K9K
	K	c                   V ^8  d   QhRR/# )r   r   Noner   )r   s   "r   r   r      s      $ r   c                 `    \         ;_uu_ 4        RsRRR4       R#   + '       g   i     R# ; i)z?Force the next ``check_website_access`` call to re-read config.N)rq   r   r   r   r   invalidate_cacher      s     
 
s   -	c               $    V ^8  d   QhRRRRRR/# )r   r'   r(   rm   r   rt   r   )r   s   "r   r   r      s!     ; ;3 ; ; ;r   c                    V '       d	   V'       g   R # VP                  R4      '       d   \        P                  ! W4      # W8H  ;'       g    V P                  RV 24      # )Fz*.r+   )r8   fnmatchendswith)r'   rm   s   &&r   _match_host_against_ruler      sI    w$t--?::dmmayM::r   c                    V ^8  d   QhRRRR/# )r   urlr(   r   r   )r   s   "r   r   r      s      3 3 r   c                   \        V 4      p\        VP                  ;'       g    VP                  4      pV'       d   V# R V 9  dB   \        RV  24      p\        VP                  ;'       g    VP                  4      pV'       d   V# R# )r4   z//r*   )r	   r/   hostnamer9   )r   r=   r'   
schemelesss   &   r   _extract_host_from_urlishr      sn    c]F6??;;fmm<DC3%j)
z22GGj6G6GHKr   c               $    V ^8  d   QhRRRRRR/# )r   r   r(   rU   rV   r   zOptional[Dict[str, str]]r   )r   s   "r   r   r      s"     2 2c 2 2Jb 2r   c                "   VfF   \         ;_uu_ 4        \        e'   \        P                  R4      '       g    RRR4       R# RRR4       \        V 4      pV'       g   R#  \	        V4      pTP                  R4      '       g   R# TP                  R. 4       F  pTP                  RR4      p\        Y&4      '       g   K(  \        P                  RYTP                  R	R
4      4       RT RTRTR	TP                  R	R
4      RRT RT RTP                  R	R
4       2/u # 	  R#   + '       g   i     L; i  \
         d'   pTe   h \        P                  RT4        Rp?R# Rp?i\         d"   p\        P                  RT4        Rp?R# Rp?ii ; i)u  Check whether a URL is allowed by the website blocklist policy.

Returns ``None`` if access is allowed, or a dict with block metadata
(``host``, ``rule``, ``source``, ``message``) if blocked.

Never raises on policy errors — logs a warning and returns ``None``
(fail-open) so a config typo doesn't break all web tools.  Pass
``config_path`` explicitly (tests) to get strict error propagation.
Nr   z.Website policy config error (failing open): %sz:Unexpected error loading website policy (failing open): %srN   rm   r*   u,   Blocked URL %s — matched rule '%s' from %srn   rg   r   r'   r1   messagezBlocked by website policy: 'z' matched rule 'z' from )rq   r   rd   r   r   r   rF   rG   	Exceptionr   info)r   rU   r'   rh   rM   r1   rm   s   &&     r   check_website_accessr      sd    [).2D2DY2O2O [[ %S)D	'4 ::i  

7B'((9b)#D22KKFdhhx&BD s$((8X624&8H	 R!XXh9:<	 	 ( K [  "GM SUXYs:   #D D  D3  D0	3F>EF,F-F		F)N),__conditional_annotations__r$   
__future__r   r   loggingos	threadingro   pathlibr   typingr   r   r   r   r   urllib.parser	   hermes_constantsr   	getLoggerr    rF   r]   rr   Lockrq   r   __annotations__r   r   r   r   r   r/   r>   rR   ri   r   r   r   r   r   )r   s   @r   <module>r      s    #   	    3 3 ! ,			8	$ urB   nn+/( /%) ] )  U  -: :44#LDN;2 2r   