
    $*jh                    <   d Z ddlmZ ddlZddlZddlZddlZddlZddlm	Z	m
Z
mZ ddlZddlmZ  ej        e          ZdZdZdZd	Ze d
e ZdZdZd1dZd2dZd3dZd3dZd2dZeZeZd1dZ 	 d4d5dZ!d6dZ"d7d!Z#d8d#Z$d9d&Z%d:d*Z&d;d.Z' G d/ d0e          Z(dS )<u  Parallel.ai web search + content extraction — plugin form.

Subclasses :class:`agent.web_search_provider.WebSearchProvider`.

Search runs on one of two transports, picked by credential:

- **No key →** the free hosted Search MCP at ``https://search.parallel.ai/mcp``
  (anonymous Streamable-HTTP JSON-RPC). This makes ``web_search`` work out of
  the box with zero setup, which is why ``parallel`` is the keyless default
  backend in :func:`tools.web_tools._get_backend`.
- **``PARALLEL_API_KEY`` →** the ``parallel`` SDK's v1 ``search`` / ``extract``
  REST endpoints (objective-tuned, mode-selectable, higher rate limits).

Extract mirrors search: keyed uses the async SDK (``AsyncParallel``) v1
``extract``; keyless uses the free MCP's ``web_fetch``. :meth:`extract` is
declared ``async def`` and the dispatcher in
:func:`tools.web_tools.web_extract_tool` detects coroutines via
:func:`inspect.iscoroutinefunction` and awaits.

Config keys this provider responds to::

    web:
      search_backend: "parallel"      # explicit per-capability
      extract_backend: "parallel"     # explicit per-capability
      backend: "parallel"             # shared fallback
      # Optional: search mode (default "advanced"; also "basic")
      # via the PARALLEL_SEARCH_MODE env var. REST path only.

Env vars::

    PARALLEL_API_KEY=...             # https://parallel.ai (optional — unlocks
                                     # the v1 REST Search API; without it,
                                     # search and extract use the free MCP)
    PARALLEL_SEARCH_MODE=advanced    # optional: basic|advanced (legacy
                                     # fast/one-shot map to basic, agentic to
                                     # advanced). REST path only.
    )annotationsN)AnyDictList)WebSearchProviderzhttps://search.parallel.ai/mcpz
2025-06-18zmcp-web-clientz1.0.0/g      >@zISearch powered by the free Parallel Web Search MCP (https://parallel.ai).returnstrc                 F    t            dt          j                    j         S )uS  Mint a fresh Parallel ``session_id`` for a single tool call.

    Per-call rather than process-global: one process serves many unrelated
    chats in the gateway/batch runners, and a shared id would pool their
    searches into one Parallel session. The prefix is deliberately generic
    (no hermes attribution — telemetry policy).
    -)_MCP_CLIENT_NAMEuuiduuid4hex     B/home/ubuntu/.hermes/hermes-agent/plugins/web/parallel/provider.py_new_session_idr   L   s!     33!1333r   Nonec                     	 ddl m}   | dd           dS # t          $ r Y dS t          $ r!}t          t	          |                    d}~ww xY w)aM  Trigger lazy install of the parallel SDK if it isn't present.

    Mirrors the lazy-deps pattern used by the legacy implementation.
    Swallows benign ImportError from the lazy_deps helper itself; if the
    SDK is genuinely missing the subsequent ``from parallel import ...``
    raises ImportError that the caller can handle.
    r   )ensurezsearch.parallelF)promptN)tools.lazy_depsr   ImportError	Exceptionr
   )_lazy_ensureexcs     r   _ensure_parallel_sdk_installedr   ]   s    $::::::&u555555    $ $ $#c((###$s    
A	AA		Ar   c                     ddl m}  t          | dd          }||S t          j        d          }|st          d          t                       ddlm}  ||          }|| _	        |S )zLazy-load + cache the sync Parallel client.

    Cache lives on :mod:`tools.web_tools` (as ``_parallel_client``) so unit
    tests that reset that name between cases keep working.
    r   N_parallel_clientPARALLEL_API_KEYVPARALLEL_API_KEY environment variable not set. Get your API key at https://parallel.ai)Parallelapi_key)
tools.web_tools	web_toolsgetattrosgetenv
ValueErrorr   parallelr#   r    )_wtcachedr%   r#   clients        r   _get_sync_clientr0   o   s     "!!!!!S,d33Fi*++G 
6
 
 	

 #$$$!!!!!!Xg&&&F!CMr   c                     ddl m}  t          | dd          }||S t          j        d          }|st          d          t                       ddlm}  ||          }|| _	        |S )z}Lazy-load + cache the async Parallel client.

    Cache lives on :mod:`tools.web_tools` (as ``_async_parallel_client``).
    r   N_async_parallel_clientr!   r"   )AsyncParallelr$   )
r&   r'   r(   r)   r*   r+   r   r,   r3   r2   )r-   r.   r%   r3   r/   s        r   _get_async_clientr4      s    
 "!!!!!S2D99Fi*++G 
6
 
 	

 #$$$&&&&&&]7+++F!'CMr   c                 .    ddl m}  d| _        d| _        dS )zDrop both cached clients so tests can re-instantiate cleanly.

    Clears the canonical slots on :mod:`tools.web_tools` (where
    :func:`_get_sync_client` / :func:`_get_async_client` read/write them).
    r   N)r&   r'   r    r2   )r-   s    r   _reset_clients_for_testsr6      s,     "!!!!!C!%Cr   c                     t          j        dd                                                                          } | dk    s| dv rdS dS )u  Return the validated v1 search mode (default "advanced").

    V1 collapses the three Beta modes into two. We accept the v1 values
    directly and map the legacy Beta values for back-compat with anyone who
    still sets ``PARALLEL_SEARCH_MODE=fast|one-shot|agentic``:

    - ``fast`` / ``one-shot`` → ``basic``  (lower latency)
    - ``agentic``             → ``advanced`` (higher quality, the v1 default)
    PARALLEL_SEARCH_MODEadvancedbasic>   one-shotfast)r)   r*   lowerstrip)modes    r   _resolve_search_moder@      sL     9+Z88>>@@FFHHDw$"666w:r   
session_id
str | Noner%   protocol_versionDict[str, str]c                L    ddt           d}| r| |d<   |r||d<   |rd| |d<   |S )u  Headers for an MCP request.

    A Bearer token is attached only when we actually hold a key — the free
    endpoint is anonymous, and sending an empty/garbage token would make it
    401 instead of serving the anonymous tier. After ``initialize`` the
    Streamable-HTTP spec expects the negotiated ``MCP-Protocol-Version`` on
    every follow-up request, so we echo it once known.
    zapplication/jsonz#application/json, text/event-stream)zContent-TypeAcceptz
User-AgentzMcp-Session-IdzMCP-Protocol-VersionzBearer Authorization)_MCP_USER_AGENT)rA   r%   rC   headerss       r   _mcp_headersrJ      s`     +7% G
  /$. ! ;*:&' 7#6W#6#6 Nr   textc              #    K   d }| pd                                 }|sdS |                    d          s|                    d          r?	 t          j        |          }n# t          j        $ r Y dS w xY w ||          E d{V  dS g fd}|                    d          D ]}|                    d          }|                    d	          r=                    |t          d	          d         	                                           i|                                 dk    r | |                      E d{V  g  | |                      E d{V  dS )
at  Yield JSON-RPC message dicts from a plain-JSON or SSE response body.

    Handles ``application/json`` (a single object) and ``text/event-stream``
    (SSE: events separated by blank lines; an event's one-or-more ``data:``
    lines concatenate into a single JSON payload). Unparseable chunks and
    non-``data`` SSE fields (``event:``/``id:``/comments) are skipped.
    c              3  X   K   t          | t                    r
| E d {V  d S | | V  d S d S N)
isinstancelist)payloads    r   _emitz!_iter_mcp_messages.<locals>._emit   sS       gt$$ 	 MMMMM ! r    N{[c                      sd S 	 t          j        d                                         S # t           j        $ r Y d S w xY w)N
)jsonloadsjoinJSONDecodeError)
data_liness   r   _flushz"_iter_mcp_messages.<locals>._flush
  sV     	4	:dii
33444# 	 	 	44	s   &. A ArW   zdata:)
r>   
startswithrX   rY   r[   splitrstripappendlenlstrip)rK   rR   bodyparsedr]   rawliner\   s          @r   _iter_mcp_messagesri      s        JBD s ts33 	Z%%FF# 	 	 	FF	5==       J     zz$  zz$??7## 	d3w<<==188::;;;;ZZ\\RuVVXX&&&&&&&JuVVXXs   A! !A43A4
request_idDict[str, Any]c                    i }t          |           D ]?}t          |t                    rd|v sd|v s |                    d          |k    r|c S |}@|S )av  Select the JSON-RPC response for *request_id* from an MCP response body.

    Streamable-HTTP servers may emit progress/log notifications before the
    final result, so we scan the whole stream and return the result/error
    message whose ``id`` matches our request. Falls back to the last
    result/error-bearing message if no id matches; ``{}`` if none is present.
    resulterrorid)ri   rO   dictget)rK   rj   fallbackmsgs       r   _mcp_response_envelopert     so      "H!$''  #t$$ 	X__3774==J&&JJJOr   envelopec                   d| v r-t          dt          | d                   dd                    |                     d          pi }|                    d          r't          dt          |          dd                    |                    d          }t          |t                    r|S |                    d	g           pg D ]}t          |t                    rl|                    d
          dk    rSt          |                    d          pd          }|sW	 t          j        |          c S # t
          j        $ r Y w xY wt          dt          |          dd                    )a  Extract the tool result payload from a ``tools/call`` envelope.

    Prefers ``structuredContent`` (authoritative machine-readable form);
    otherwise scans text blocks for the first JSON-parseable one. Raises on a
    JSON-RPC error or a tool-level ``isError``.
    rn   zParallel MCP error: Ni  rm   isErrorzParallel MCP tool error: structuredContentcontenttyperK   rS   z,Parallel MCP returned no parseable content: )RuntimeErrorr
   rq   rO   rp   rX   rY   r[   )ru   rm   
structuredblockrK   s        r   _mcp_payloadr~   .  s    (P#hw6G2H2H#2NPPQQQ\\(##)rFzz) LJs6{{4C47HJJKKK/00J*d## Ir**0b  eT"" 	uyy'8'8F'B'Buyy((.B//D z$''''''   
Js6{{4C47HJJ  s   !D77E	E		tool_name	argumentsc                   t          j        t                    5 }t          t	          j                              }|                    t          t          d|          d|dt          i t          t          ddd          }|                                 |j                            d	          }t          |j        |          }|                    d
          pi                     d          pt          }|                    t          t          |||          ddd           t          t	          j                              }	|                    t          t          |||          d|	d| |dd          }
|
                                 t#          t          |
j        |	                    cddd           S # 1 swxY w Y   dS )u<  Run the MCP handshake then a single ``tools/call`` and return its payload.

    initialize → (capture ``Mcp-Session-Id``) → notifications/initialized →
    tools/call ``tool_name``. Returns the parsed tool payload dict (see
    :func:`_mcp_payload`). A Bearer token is attached only when *api_key* is set.
    )timeoutNz2.0
initialize)nameversion)protocolVersioncapabilities
clientInfo)jsonrpcro   methodparams)rI   rX   zmcp-session-idrm   r   znotifications/initialized)r   r   z
tools/call)r   r   )httpxClient_MCP_TIMEOUT_SECONDSr
   r   r   post_MCP_SEARCH_URLrJ   _MCP_PROTOCOL_VERSIONr   _MCP_CLIENT_VERSIONraise_for_statusrI   rq   rt   rK   r~   )r   r   r%   r/   init_idinitmcp_session_idinit_envnegotiated_versioncall_idcalls              r   	_mcp_callr   M  s0    
2	3	3	3 8Hvdjll##{{ w// &'<$& 0#6# # 	   
 
" 	 ))*:;;)$)W== \\(##)r../@AA %$ 	 	 :LMM".IJJ 	 	
 	
 	
 djll##{{ :LMM &#,9EE	   	
 	
 	249gFFGGq8H 8H 8H 8H 8H 8H 8H 8H 8H 8H 8H 8H 8H 8H 8H 8H 8H 8Hs   FF;;F?F?querylimitintc                   t          d| | gt                      d|          }g }t          |                    d          pg dt	          |d                             D ]\  }}t          |t                    s|                    d          pg }|                    |                    d          pd|                    d	          pd|rd
                    |          nd|dz   d           dd|idt          dS )aN  Run a ``web_search`` tool call against the hosted Search MCP.

    Returns the standard provider search shape
    (``{"success": True, "data": {"web": [...]}}``). The MCP serves a fixed
    result count, so ``limit`` is applied client-side. The MCP requires
    ``objective`` (REST treats it as optional), so we mirror the query.
    
web_search)	objectivesearch_queriesrA   resultsN   excerptsurlrS   title r   r   descriptionpositionTwebr,   )successdataproviderattribution)
r   r   	enumeraterq   maxrO   rp   rb   rZ   _FREE_MCP_ATTRIBUTION)r   r   r%   rQ   web_resultsirm   r   s           r   _mcp_web_searchr     s6    $g)++	
 	

 	 G )+KI 6 6 <"oE1oNOO 
 
	6&$'' 	::j))/Rzz%((.BG,,25=Esxx1112E	 	
 	
 	
 	
 $,	  r   urls	List[str]List[Dict[str, Any]]c           
        t          dt          |           dt                      d|          }i }|                    d          pg D ]H}t	          |t
                    r1|                    d          r|                    |d         |           Ig }| D ]}|                    |          }||                    |dddd	|id
           7|                    d          pd}|                    d          p)d                    |                    d          pg           }|                    ||||||dd           |S )u  Run a ``web_fetch`` tool call against the hosted Search MCP.

    Returns the per-URL extract shape that
    :func:`tools.web_tools.web_extract_tool` expects — exactly one row per input
    URL, in request order (including duplicates). We pass ``full_content=True``
    so the page body comes back as markdown (matching the keyed SDK path and
    what extract callers/summarizers expect), falling back to excerpts only when
    full content is absent. Any input the MCP didn't return is emitted as a
    per-URL error row.
    	web_fetchT)r   full_contentrA   r   r   NrS   z'extraction failed (no content returned)	sourceURLr   r   ry   rn   metadatar   r   

r   r   r   r   r   ry   raw_contentr   )	r   rP   r   rq   rO   rp   
setdefaultrb   rZ   )	r   r%   rQ   by_urlitemr   r   r   ry   s	            r   _mcp_web_fetchr     s    dTARARSS G )+FI&&," 1 1dD!! 	1dhhuoo 	1d5k4000$&G 
 
zz#<NN!F!,c 2    !!'R ((>**Ufkk$((::N:N:TRT.U.U"&*->> 	
 	
 	
 	
 Nr   c                  t    e Zd ZdZedd            Zedd            ZddZddZdd	Z	dddZ
ddZddZdS )ParallelWebSearchProviderz,Parallel.ai search + async extract provider.r	   r
   c                    dS )Nr,   r   selfs    r   r   zParallelWebSearchProvider.name      zr   c                    dS )Nr#   r   r   s    r   display_namez&ParallelWebSearchProvider.display_name  r   r   boolc                j    t          t          j        dd                                                    S )a  Return True when ``PARALLEL_API_KEY`` is set.

        Deliberately key-based: this gates the registry's active-provider walk
        and the ``hermes tools`` picker (auto-selecting Parallel for a user who
        hasn't named it), so it must not claim availability on the keyless path.
        The keyless free-MCP path is reached independently via
        :func:`tools.web_tools._get_backend`'s ``parallel`` terminal default.
        r!   rS   )r   r)   r*   r>   r   s    r   is_availablez&ParallelWebSearchProvider.is_available  s+     BI0"55;;==>>>r   c                    dS NTr   r   s    r   supports_searchz)ParallelWebSearchProvider.supports_search      tr   c                    dS r   r   r   s    r   supports_extractz*ParallelWebSearchProvider.supports_extract  r   r      r   r   r   rk   c                   	 ddl m}  |            rdddS t          j        dd                                          }|s.t
                              d||           t          ||d	
          S t                      }t
                              d|||           t                      
                    |g||t                      dt          t          |d          d          i          }g }t          |j        pg d	t          |d                             D ]P\  }}	|	j        pg }
|                    |	j        pd|	j        pd|
rd                    |
          nd|dz   d           Qdd|idS # t*          $ r}dt-          |          dcY d	}~S d	}~wt.          $ r}dd| dcY d	}~S d	}~wt0          $ r-}t
                              d|           dd| dcY d	}~S d	}~ww xY w)a  Execute a Parallel search (sync).

        With ``PARALLEL_API_KEY`` set, uses the v1 ``search`` REST endpoint with
        the configured mode (``PARALLEL_SEARCH_MODE`` env var, default
        "advanced"; limit requested via advanced_settings.max_results, capped at
        20). Without a key, falls back to the free hosted Search MCP so search
        still works with zero setup.
        r   is_interruptedFInterrupted)r   rn   r!   rS   z+Parallel search (free MCP): '%s' (limit=%d)Nr$   z3Parallel search (v1 REST): '%s' (mode=%s, limit=%d)max_resultsr      )r   r   r?   rA   advanced_settingsr   r   Tr   )r   r   Parallel SDK not installed: zParallel search error: %szParallel search failed: )tools.interruptr   r)   r*   r>   loggerinfor   r@   r0   searchr   minr   r   r   r   rb   r   r   rZ   r+   r
   r   r   warning)r   r   r   r   r%   r?   responser   r   rm   r   r   s               r   r   z ParallelWebSearchProvider.search  s   5	Q666666~ B#(=AAAi 2B77==??G CA5%   'ueTBBBB'))DKKEtU   ())00 %w*,,#0#c%mmR2H2H"I 1  H K&(8(>B#eQ--'PQQ 	 		6!?0b""%z/R!'!3=E'Msxx'9'9'92$%E	      $e[-ABBB 	9 	9 	9$s3xx88888888 	 	 	 ===         	Q 	Q 	QNN6<<<$/O#/O/OPPPPPPPP	QsN   E0 AE0 .DE0 0
G%:FG%G%F+%G%+G%8"G G% G%r   r   kwargsr   r   c           
       K   	 ddl m}  |            rd |D             S t          j        dd                                          }|sVt
                              dt          |                     t          j	        t          t          |          d           d{V S t
                              dt          |                     t                                          |d	d
it                                 d{V }g }|j        pg D ]Y}|j        pd}|sd                    |j        pg           }|j        pd}	|j        pd}
|                    |	|
|||	|
dd           Z|j        pg D ]g}t/          |dd          pd}t/          |dd          p#t/          |dd          pt/          |dd          pd}|                    |dd|d|id           h|S # t0          $ rfd|D             cY dS dwt2          $ rfd|D             cY dS dwt4          $ r3t
                              d           fd|D             cY dS dww xY w)uQ  Extract content from one or more URLs.

        With ``PARALLEL_API_KEY`` set, uses the async SDK's v1 ``extract`` for
        full page content. Without a key, falls back to the free hosted Search
        MCP's ``web_fetch`` tool so extraction works with zero setup, mirroring
        the keyless search path.

        Returns the legacy list-of-results shape that
        :func:`tools.web_tools.web_extract_tool` expects: one entry per
        successful URL plus one entry per failed URL with an ``error``
        field. Errors are not raised — they're returned as per-URL items.
        r   r   c                    g | ]}|d dd	S )r   rS   )r   rn   r   r   ).0us     r   
<listcomp>z5ParallelWebSearchProvider.extract.<locals>.<listcomp>a  s/       HIACC  r   r!   rS   z0Parallel extract (free MCP web_fetch): %d URL(s)Nz%Parallel extract (v1 REST): %d URL(s)r   T)r   r   rA   r   r   r   r   messagery   
error_typezextraction failedr   r   c                8    g | ]}|d d t                    dS )rS   r   r   ry   rn   )r
   r   r   r   s     r   r   z5ParallelWebSearchProvider.extract.<locals>.<listcomp>  s,    \\\RSArCHHMM\\\r   c                $    g | ]}|d d d dS )rS   r   r   r   r   s     r   r   z5ParallelWebSearchProvider.extract.<locals>.<listcomp>  s?        B2@d_b@d@dee  r   zParallel extract error: %sc                $    g | ]}|d d d dS )rS   zParallel extract failed: r   r   r   s     r   r   z5ParallelWebSearchProvider.extract.<locals>.<listcomp>  s?        B2@a\_@a@abb  r   )r   r   r)   r*   r>   r   r   rc   asyncio	to_threadr   rP   r4   extractr   r   r   rZ   r   r   r   rb   errorsr(   r+   r   r   r   )r   r   r   r   r%   r   r   rm   ry   r   r   rn   err_urlerr_msgr   s                 @r   r   z!ParallelWebSearchProvider.extractN  s     I	666666~  MQ    i 2B77==??G QFD		   %.~tDzz4PPPPPPPPPKK?TKKK /0088#14"8*,, 9        H -/G"*0b   -3 A$kk&/*?R@@Gj&B*"!&#*'.25$F$F     ".B  !%339rE9d33 +ui66+ulD99+ +	  &!##%!(%0'$:     N 	] 	] 	]\\\\W[\\\\\\\\\ 	 	 	             	 	 	NN7===           	sN   G A>G  D7G 
I"G5/I5IHII"(I
IIc                    dddddddgdS )	Nr#   freeu   Free web search + extraction via Parallel's hosted Search MCP — no key needed. Add PARALLEL_API_KEY for the v1 REST Search API (richer modes, higher limits).r!   u>   Parallel API key (optional — unlocks the v1 REST Search API)zhttps://parallel.ai)keyr   r   )r   badgetagenv_varsr   r   s    r   get_setup_schemaz*ParallelWebSearchProvider.get_setup_schema  s6    5 .^0 
 
 	
r   Nr	   r
   )r	   r   )r   )r   r
   r   r   r	   rk   )r   r   r   r   r	   r   )r	   rk   )__name__
__module____qualname____doc__propertyr   r   r   r   r   r   r   r  r   r   r   r   r     s        66   X    X	? 	? 	? 	?      >Q >Q >Q >Q >Q@X X X Xt
 
 
 
 
 
r   r   r  )r	   r   )r	   r   rN   )rA   rB   r%   rB   rC   rB   r	   rD   )rK   r
   )rK   r
   rj   r
   r	   rk   )ru   rk   r	   rk   )r   r
   r   rk   r%   rB   r	   rk   )r   r
   r   r   r%   rB   r	   rk   )r   r   r%   rB   r	   r   ))r  
__future__r   r   rX   loggingr)   r   typingr   r   r   r   agent.web_search_providerr   	getLoggerr  r   r   r   r   r   rH   r   r   r   r   r0   r4   r6   _get_parallel_client_get_async_parallel_clientr@   rJ   ri   rt   r~   r   r   r   r   r   r   r   <module>r     sC  $ $L # " " " " "    				  " " " " " " " " " "  7 7 7 7 7 7		8	$	$ 3$  $  %==(;==  P 
4 4 4 4"$ $ $ $$   6   4	& 	& 	& 	& ( .    > $(    6, , , ,^   $   >AH AH AH AHH' ' ' 'T4 4 4 4nF
 F
 F
 F
 F
 1 F
 F
 F
 F
 F
r   