
    RjX                        d Z ddlmZ ddlZddlZddlZddlmZmZm	Z	m
Z
 ddlmZ ddlmZmZmZ  ej        e          ZdZdZd	Z ej        d
ej                  ZddZddZ G d de          ZdS )u  xAI Web Search — plugin form.

Routes ``web_search`` tool calls through xAI's agentic Web Search tool
(server-side ``web_search`` on the Responses API). Grok runs the actual
searching and page-browsing server-side; we ask it to return the top
results as structured JSON so we can hand back the same
``{title, url, description, position}`` rows every other Hermes web
provider produces.

Reference: https://docs.x.ai/developers/tools/web-search

Config keys this provider responds to::

    web:
      search_backend: "xai"           # explicit per-capability
      backend: "xai"                  # shared fallback

Optional knobs (under ``web.xai`` in ``config.yaml``)::

    web:
      xai:
        model: "grok-4.3"             # reasoning model required by web_search
        allowed_domains: ["x.ai"]     # max 5 — mutually exclusive with excluded_domains
        excluded_domains: ["bad.com"] # max 5 — mutually exclusive with allowed_domains
        timeout: 90                   # seconds (default 90)

Auth: reuses :func:`tools.xai_http.resolve_xai_http_credentials`, which
prefers Hermes-managed xAI Grok OAuth (via ``hermes auth``) and falls back
to ``XAI_API_KEY`` (resolved through ``~/.hermes/.env``, then
``os.environ``).
    )annotationsN)AnyDictListOptional)WebSearchProvider)has_xai_credentialshermes_xai_user_agentresolve_xai_http_credentialszgrok-4.3Z      z\{[\s\S]*\}returnDict[str, Any]c                 n   	 ddl m}   |             }t          |t                    r|                    d          nd}t          |t                    r|                    d          nd}t          |t                    r|ni S # t
          $ r'}t                              d|           i cY d}~S d}~ww xY w)z7Read ``web.xai`` from config.yaml (returns {} on miss).r   )load_configwebNxaiz!Could not load web.xai config: %s)hermes_cli.configr   
isinstancedictget	Exceptionloggerdebug)r   cfgweb_sectionxai_sectionexcs        =/home/ubuntu/.hermes/hermes-agent/plugins/web/xai/provider.py_load_xai_web_configr    @   s    	111111kmm(23(=(=Gcggennn40:;0M0MWkooe,,,SW(d;;C{{C   8#>>>						s   B B 
B4B/)B4/B4valuer   	List[str]c                   t          | t                    sg S g }| D ]l}t          |t                    r;|                                r'|                    |                                           t          |          t          k    r nm|S )z<Coerce a config value to a clean list of <=5 domain strings.)r   liststrstripappendlen_MAX_DOMAIN_FILTERS)r!   cleaneditems      r   _coerce_domain_listr,   N   s    eT"" 	G  dC   	)TZZ\\ 	)NN4::<<(((w<<...E /N    c                      e Zd ZdZedd            Zedd            ZddZddZdd	Z	d d!dZ
ed"d            Zed#d            Zed$d            Zed%d            Zed&d            Zd'dZdS )(XAIWebSearchProvideruv  Search-only provider backed by xAI's agentic Web Search tool.

    Sends a structured prompt to Grok with ``tools=[{"type": "web_search"}]``
    enabled and asks it to return the top *limit* results as JSON. Falls
    back to the Responses API ``citations`` list if Grok ignores the JSON
    schema instruction (rare for grok-4.3 but cheap insurance).

    No extract capability — pair with Firecrawl / Tavily / Exa for
    ``web_extract`` if you need page content.

    Trust model
    -----------
    Unlike index-backed providers (Brave / Tavily / Exa) which return
    verbatim search-engine results, this backend is an LLM in a trench
    coat: Grok decides which URLs to surface, generates the titles and
    descriptions itself, and is influenced by the *content of the query*.
    A maliciously crafted query (e.g. injected via untrusted upstream
    input the agent picked up) can in principle steer Grok into emitting
    attacker-chosen URLs. Callers that pipe untrusted text directly into
    ``web_search`` should treat returned URLs the same way they would
    treat any model-generated link — validate before fetching.
    r   r%   c                    dS )Nr    selfs    r   namezXAIWebSearchProvider.namex   s    ur-   c                    dS )NxAI Web Search (Grok)r1   r2   s    r   display_namez!XAIWebSearchProvider.display_name|   s    &&r-   boolc                    t                      S )u  Cheap availability probe — env var OR auth-store has OAuth tokens.

        Delegates to :func:`tools.xai_http.has_xai_credentials`, which is
        deliberately *not* the same as :func:`resolve_xai_http_credentials`:
        it never triggers OAuth token refresh or acquires the auth-store
        lock. The ABC contract requires this method to be safe to call on
        every ``hermes tools`` repaint and at tool-registration time.
        Token freshness / refresh is handled inside :meth:`search`.
        )r	   r2   s    r   is_availablez!XAIWebSearchProvider.is_available   s     #$$$r-   c                    dS )NTr1   r2   s    r   supports_searchz$XAIWebSearchProvider.supports_search   s    tr-   c                    dS )NFr1   r2   s    r   supports_extractz%XAIWebSearchProvider.supports_extract   s    ur-   r   querylimitintr   c                
   	 ddl m}  |            rdddS n# t          $ r Y nw xY wt                      }t	          |                    d          pd                                          }t	          |                    d          pd	                                                              d
          }|sdddS 	 t          |          }n# t          t          f$ r d}Y nw xY wt          dt          |d                    }t                      }t          |                    d          t                    r|                    d          nt          }|                                pt          }	 t!          |                    dt"                              }	n# t          t          f$ r
 t"          }	Y nw xY wt%          |                    d                    }
t%          |                    d                    }|
r|rdddS ddi}|
rd|
i|d<   n	|rd|i|d<   |                     ||          }|d|dg|gdgd}d| dt)                      d}	 ddl}n# t,          $ r dddcY S w xY wt.                              d ||||           |                    d!          d"k    }d}t3          d#          D ]}	 |                    | d$|||	%          }|                                  n# |j        $ rF}|j        |j        j        nd}|d&k    r|dk    r|rt.                              d'           	 t          d()          }t	          |                    d          pd                                          }|r||k    r|}d| |d*<   Y d}~n2# t          $ r%}t.                              d+|           Y d}~nd}~ww xY wd}	 |j        |j        j         dd,         nd}n# t          $ r d}Y nw xY wt.                              d-||           dd.| d/|                                 dcY d}~c S d}~w|j!        $ r/}t.                              d0|           dd1| dcY d}~c S d}~ww xY w|dd2dS 	 |"                                }n7# t          $ r*}t.                              d3|           dd4dcY d}~S d}~ww xY wt          |tF                    r|                    d5          nd}t          |tF                    rO|                    d6          p|                    d7          pd8}t.                              d9|           dd:| dS | $                    ||;          }|sd(d<g id=S d(d<|id=S )>zExecute a Grok-backed web search.

        Returns ``{"success": True, "data": {"web": [{title, url, description, position}, ...]}}``
        on success, ``{"success": False, "error": str}`` on failure.
        r   )is_interruptedFInterrupted)successerrorapi_key base_urlzhttps://api.x.ai/v1/z_No xAI credentials found. Run `hermes auth` to sign in with xAI Grok OAuth, or set XAI_API_KEY.r      d   modeltimeoutallowed_domainsexcluded_domainszZweb.xai.allowed_domains and web.xai.excluded_domains cannot both be set (xAI restriction).type
web_searchfiltersuser)rolecontentno_inline_citations)rM   inputtoolsincludezBearer zapplication/json)AuthorizationzContent-Typez
User-AgentNz4httpx is not installed (required for xAI web search)z0xAI web search via %s: '%s' (limit=%d, model=%s)providerz	xai-oauth   z
/responses)headersjsonrN   i  zQxAI web search got 401 on first attempt; forcing OAuth refresh and retrying once.T)force_refreshr[   z1xAI web search OAuth refresh after 401 failed: %si,  zxAI web search HTTP %d: %szxAI web search returned HTTP z: z xAI web search request error: %szCould not reach xAI: z#xAI web search produced no responsezxAI web search bad JSON: %sz/Could not parse xAI Responses API reply as JSONrF   messagecodezunknown errorz*xAI web search returned error envelope: %szxAI returned an error: r@   r   )rE   data)%tools.interruptrC   r   r   r%   r   r&   rstriprA   	TypeError
ValueErrormaxminr    r   DEFAULT_MODELfloatDEFAULT_TIMEOUTr,   _build_promptr
   httpxImportErrorr   inforangepostraise_for_statusHTTPStatusErrorresponsestatus_codewarningtextRequestErrorr_   r   _extract_results)r3   r?   r@   rC   credsrG   rI   r   rM   rN   allowedexcludedweb_search_toolpromptpayloadr^   ro   is_oauth_pathrespattemptr   status	refreshedrefreshed_keyrefresh_excbodyrd   	api_errorerr_msgweb_resultss                                 r   searchzXAIWebSearchProvider.search   s   	666666~ B#(=AAAB 	 	 	D	 -..eii	**0b117799uyy,,E0EFFLLNNUUVYZZ 	 :  	JJEE:& 	 	 	EEE	As5#''"$$$.swww/?/?$E$EX   =.	&CGGI??@@GG:& 	& 	& 	&%GGG	& &cgg.?&@&@AA&sww/A'B'BCC 		x 		 !<   ,2<*@ 	H*;W)EOI&& 	H*<h)GOI&##E511 %&99:%& ..#
 #
 1w00./11
 
	LLLL 	 	 	 O    	 	>eUE	
 	
 	
  :..+=Qxx +	R +	RG*Rzz+++# #	 "   %%'''(   58\5M11STS==W\\m\KK5  $@t$T$T$T	(+IMM),D,D,J(K(K(Q(Q(S(S( %]g-E-E&3G7J7J7JGO4$HHHH %   O'       
 69l6N3<,TcT22TVDD    DDD;VTJJJ$MVMMtMMTTVV          % R R RA3GGG#(3P33P3PQQQQQQQQQQR <$/TUUU	99;;DD 	 	 	NN8#>>> J       	 *4D$)?)?IDHHW%%%T	i&& 	Ti(( #==((#" 
 NNGQQQ$/R/R/RSSS++D+>> 	:  $eR[999%)=>>>s    
%%=C C#"C#2(F F65F6I I-,I-<0K//Q7:=P;8ANP;
O O ;P; OP;O)(P;)O85P;7O88;P;3Q7;Q7"Q2*Q72Q7R 
S!S SSc                    d| d|  S )a7  Compose the prompt that asks Grok to act as a search engine.

        We deliberately ask for a JSON object (not bare array) so we can
        match it cheaply with ``_JSON_BLOCK_RE``; we explicitly forbid
        prose, markdown fences, and inline-citation links to keep the
        payload parseable.
        u<  Use the web_search tool to find current information for the query below, then respond with ONLY a single JSON object — no prose, no markdown fences, no inline citation links — matching this exact schema:

{"results": [{"title": "string", "url": "string", "description": "1-2 sentence summary"}]}

Return at most zy results, ordered by relevance, with absolute https:// URLs. If no usable results exist, return {"results": []}.

Query: r1   )r?   r@   s     r   rn   z"XAIWebSearchProvider._build_promptS  s*    
 $   
	
r-   response_dataList[Dict[str, Any]]c               z   |                      |          \  }}|D ]}|                     ||          }|r|c S  |r1d                    |          }|                     |||          }|r|S |                    d          pg }	t          |	t                    r!d t          |	d|                   D             S g S )u<  Pull a ``[{title, url, description, position}, ...]`` list out of a
        Responses-API reply.

        Strategy:

        1. Walk ``output[*].content[*].text`` for ``output_text`` blocks and
           try to parse the first JSON object that has a ``results`` list.
        2. If the JSON path fails, fall back to the message annotations
           (``url_citation`` entries) — every annotation carries a URL and
           a ``title`` (citation number); we pair those URLs with surrounding
           text from the message body as a best-effort description.
        rc   
	citationsc                    g | ]E\  }}t          |t                    |                                .d t          |          d |dz   dFS )rH   rK   titleurldescriptionposition)r   r%   r&   ).0ius      r   
<listcomp>z9XAIWebSearchProvider._extract_results.<locals>.<listcomp>  sm     	 	 	 Aqa%%	 +,''))	q66#% !A	 	 	 	r-   N)_collect_output_text_try_parse_json_resultsjoin_results_from_annotationsr   r   r$   	enumerate)
clsr   r@   text_blocksr   blockparsedjoined_textannotation_resultsr   s
             r   r{   z%XAIWebSearchProvider._extract_resultsh  s   & $'#;#;M#J#J [ ! 	 	E00e0DDF   	*))K00K!$!>!>[ "? " " " *)) "%%k228b	i&& 
		 	 &i&788	 	 	 	 	r-   &tuple[List[str], List[Dict[str, Any]]]c                   g }g }|                      d          }t          |t                    s||fS |D ]8}t          |t                    r|                     d          dk    r2|                     d          }t          |t                    s]|D ]}t          |t                    r|                     d          dk    r1|                     d          }t          |t                    r)|                                r|                    |           |                     d          }t          |t                    r*|D ]'}	t          |	t                    r |j        |	           (ِ:||fS )zEReturn (text_blocks, annotations) extracted from ``response.output``.outputrQ   ra   rV   output_textry   r   )r   r   r$   r   r%   r&   r'   )
r   r   r   r   r+   rV   chunkry   chunk_annotationsanns
             r   r   z)XAIWebSearchProvider._collect_output_text  s   
 "$,.""8,,&$'' 	,++ 	4 	4DdD)) TXXf-=-=-J-Jhhy))Ggt,,   
4 
4!%.. %))F2C2C}2T2Tyy((dC(( -TZZ\\ -&&t,,,$)IIm$<$<!/66 40 4 4%c400 4.K.s333
4 K''r-   ry   Optional[List[Dict[str, Any]]]c                  | g}t                               |           }|rA|                    d          | k    r(|                    |                    d                     |D ]i}	 t	          j        |          }n# t          j        t          f$ r Y 1w xY wt          |t                    sK|
                    d          }t          |t                    svg }|d|         D ]}t          |t                    st          |
                    dd                                                    }	|	sP|                    t          |
                    dd                                                    |	t          |
                    dd                                                    t          |          dz   d	           |r|c S kdS )
aR  Parse a JSON object with a ``results`` array out of ``text``.

        Returns the normalized result list on success, ``None`` when the
        block has no valid JSON object or no ``results`` key. Tolerates
        leading/trailing prose because reasoning models sometimes prefix a
        short narration even when told not to.
        r   resultsNr   rH   r   r   rK   r   )_JSON_BLOCK_REr   groupr'   r_   loadsJSONDecodeErrorrh   r   r   r   r$   r%   r&   r(   )
ry   r@   
candidatesmatch	candidater   r   
normalizedrowr   s
             r   r   z,XAIWebSearchProvider._try_parse_json_results  s    V
%%d++ 	.U[[^^t++ekk!nn---# 	" 	"II..(*5   fd++ jj++Ggt,, /1Jvv  !#t,, #''%,,--3355 !!!$SWWWb%9%9!:!:!@!@!B!B"'*377="+E+E'F'F'L'L'N'N %(
OOa$7 
 
 
 
  "!!!!"ts   'A<<BBr   r   c          	     8   t                      }g }| D ]}|                    d          dk    rt          |                    dd                                                    }|r||v rY|                    |           d}|                    d          }|                    d          }	t          |t                    rt          |	t                    rd|cxk    r|	cxk     rt          |          k    ran n^t          d|dz
            }
||
|                                         }t          |          dk    r|d	d
                                         }|	                    d||t          |          dz   d           t          |          |k    r n|S )a	  Best-effort fallback when JSON parsing fails.

        Uses each ``url_citation`` annotation's ``url`` (the citation
        title is just the integer label, so we don't surface it) and
        slices ~200 characters of surrounding text as the description.
        rQ   url_citationr   rH   start_index	end_indexr      i8NrK   r   )
setr   r%   r&   addr   rA   r(   ri   r'   )r   r   r@   seenr   r   r   r   startendwindow_starts              r   r   z.XAIWebSearchProvider._results_from_annotations  s    (* 	 	Cwwv.00cggeR(())//11C #++HHSMMMKGGM**E''+&&C%%% =*S#*>*> =1CgCgCgCgPSCgCgCgCgWZ[fWgWgCgCgCgCgCg"1eck22),u*<=CCEE{##c))"-dee"4":":"<"<KNN#. #Gq 0	    7||u$$ %r-   c                    dddg ddS )Nr6   paiduU   Agentic web search via Grok's web_search tool — uses xAI Grok OAuth or XAI_API_KEY.xai_grok)r4   badgetagenv_vars
post_setupr1   r2   s    r   get_setup_schemaz%XAIWebSearchProvider.get_setup_schema   s&    
 ,- $	
 	
 		
r-   N)r   r%   )r   r8   )r   )r?   r%   r@   rA   r   r   )r?   r%   r@   rA   r   r%   )r   r   r@   rA   r   r   )r   r   r   r   )ry   r%   r@   rA   r   r   )r   r   r   r%   r@   rA   r   r   r   r   )__name__
__module____qualname____doc__propertyr4   r7   r:   r<   r>   r   staticmethodrn   classmethodr{   r   r   r   r   r1   r-   r   r/   r/   `   sp        .    X ' ' ' X'
% 
% 
% 
%      
{? {? {? {? {?~ 
 
 
 \
( 6 6 6 [6p ( ( ( \(: 0 0 0 \0d ) ) ) \)Z
 
 
 
 
 
r-   r/   r   )r!   r   r   r"   )r   
__future__r   r_   loggingretypingr   r   r   r   agent.web_search_providerr   tools.xai_httpr	   r
   r   	getLoggerr   r   rk   rm   r)   compile	MULTILINEr   r    r,   r/   r1   r-   r   <module>r      sI   @ # " " " " "   				 , , , , , , , , , , , , 7 7 7 7 7 7          
	8	$	$ 
 NBL99   
 
 
 
$M
 M
 M
 M
 M
, M
 M
 M
 M
 M
r-   