+
    ViS                     f   R t ^ RIt^ RIHtHtHtHt ^RIHtH	t	H
t
HtHtHtHtHtHtHtHtHt ^RIHtHtHtHt ^RIHt R R ltR R	 ltR
 R ltR R ltR R ltR#RR/R R lllt RR/R R llt!R#RR/R R lllt"R R lt#R$RR/R R lllt$R$RR/R R lllt%R R lt&R R  lt'R! R" lt(R# )%z.
Crawling functionality for Firecrawl v2 API.
N)OptionalDictAnyList)CrawlRequestCrawlJobCrawlResponseDocumentCrawlParamsRequestCrawlParamsResponseCrawlParamsDataWebhookConfigCrawlErrorsResponseActiveCrawlsResponseActiveCrawlPaginationConfig)
HttpClienthandle_response_errorvalidate_scrape_optionsprepare_scrape_options)normalize_document_inputc                (    V ^8  d   QhR\         RR/# )   requestreturnN)r   )formats   "Y/home/ubuntu/hermes-agent/venv/lib/python3.14/site-packages/firecrawl/v2/methods/crawl.py__annotate__r      s     8 8\ 8d 8    c                   V P                   '       d!   V P                   P                  4       '       g   \        R4      hV P                  e   V P                  ^ 8:  d   \        R4      hV P                  e   \        V P                  4       R# R# )z
Validate crawl request parameters.

Args:
    request: CrawlRequest to validate
    
Raises:
    ValueError: If request is invalid
URL cannot be emptyNzLimit must be positive)urlstrip
ValueErrorlimitscrape_optionsr   )r   s   &r   _validate_crawl_requestr&      sm     ;;;gkk//11.//}} W]]a%7122 ) 6 67 *r   c                0    V ^8  d   QhR\         R\        /# )r   r   r   )r   dict)r   s   "r   r   r   &   s     G GL GT Gr   c                f   \        V 4       RV P                  /pV P                  '       d   V P                  VR&   V P                  e"   \	        V P                  4      pV'       d   W!R&   V P                  RRR7      pVP                  RR4       VP                  RR4       VP                  RR4       V P                  eP   \        V P                  \        4      '       d   V P                  VR&   MV P                  P                  RR	7      VR&   R
RRRRRRRRRRRRRRRRRRRRRRRR R!R"R#/pVP                  4        F   w  rVWS9   g   K  VP                  V4      W&   K"  	  VP                  V4       R$V9   d8   \        VR$,          \        4      '       d   VR$,          P                  4       VR$&   V# )%z
Prepare crawl request for API submission.

Args:
    request: CrawlRequest to prepare
    
Returns:
    Dictionary ready for API submission
r!   promptNscrapeOptionsT)exclude_noneexclude_unsetr%   webhook)r,   include_pathsincludePathsexclude_pathsexcludePathsmax_discovery_depthmaxDiscoveryDepthsitemapignore_query_parametersignoreQueryParametersdeduplicate_similar_urlsdeduplicateSimilarURLscrawl_entire_domaincrawlEntireDomainallow_external_linksallowExternalLinksallow_subdomainsallowSubdomainsignore_robots_txtignoreRobotsTxtdelaymax_concurrencymaxConcurrencyregex_on_full_urlregexOnFullURLzero_data_retentionzeroDataRetentionintegration)r&   r!   r*   r%   r   
model_dumppopr.   
isinstancestritemsupdater"   )r   datascrape_datarequest_datafield_mappings
snake_case
camel_cases   &      r   _prepare_crawl_requestrV   &   s    G$ 7;;D ~~~ X ),W-C-CD$/! %%4t%LL UD!Xt$%t, "goos++%ooDO &oo88d8KDO 	29!#:"$<2 4-.+-2N$ #1"6"6"8
%+//
;D #9
 	KKD,?!E!E"=1779]Kr   c                r    V ^8  d   QhR\         \        \        ,          ,          R\        \        ,          /# )r   	data_listr   )r   r   r   r	   )r   s   "r   r   r   p   s'      htCy&9 d8n r   c           
          . pT ;'       g    .  F>  p\        V\        4      '       g   K  VP                  \        R/ \	        V4      B 4       K@  	  V# )N )rL   r(   appendr	   r   )rX   	documentsdoc_datas   &  r   _parse_crawl_documentsr^   p   sJ     "IOOOh%%XK(@(JKL $ r   c                t    V ^8  d   QhR\         \        \        3,          R\         \        \        3,          /# )r   response_datar   )r   rM   r   )r   s   "r   r   r   x   s*      S#X 4S> r   c                 r   V P                  R 4      '       g   \        V P                  RR4      4      hRV P                  R4      RV P                  R^ 4      RV P                  R^ 4      RV P                  R^ 4      RV P                  R	4      R
V P                  R
4      R\        V P                  R. 4      4      /# )successerrorUnknown error occurredstatus	completedtotalcredits_usedcreditsUsed
expires_at	expiresAtnextrP   )get	Exceptionr^   )r`   s   &r   _parse_crawl_status_responsero   x   s    Y''))'3KLMM 	-##H-]&&{A6""7A.))-;m''4!!&)&}'8'8'DE r   c                <    V ^8  d   QhR\         R\        R\        /# r   clientr   r   )r   r   r   )r   s   "r   r   r      s&      N  N
  N\  Nm  Nr   c                Z   \        V4      pV P                  RV4      pVP                  '       g   \        VR4       VP	                  4       pVP                  R4      '       d0   RVP                  R4      RVP                  R4      /p\        R/ VB # \        VP                  RR4      4      h)	a  
Start a crawl job for a website.

Args:
    client: HTTP client instance
    request: CrawlRequest containing URL and options
    
Returns:
    CrawlResponse with job information
    
Raises:
    ValueError: If request is invalid
    Exception: If the crawl operation fails to start
z	/v2/crawlzstart crawlrb   idr!   rc   rd   rZ   )rV   postokr   jsonrm   r   rn   )rr   r   rR   responser`   job_datas   &&    r   start_crawlrz      s     *'2L{{;5H;;;h6MMOM##-##D)=$$U+

 (x(())'3KLMMr   request_timeoutc          
          V ^8  d   QhR\         R\        R\        \        ,          R\        \        ,          R\
        /# )r   rr   job_idpagination_configr{   r   )r   rM   r   r   floatr   )r   s   "r   r   r      sD     > >>>   01>
 e_> >r   c          
     2   V P                  RV 2VR7      pVP                  '       g   \        VR4       VP                  4       p\	        V4      pVR,          pV'       d   VP
                  MRpV'       dW   VR,          '       dH   V'       d)   VP                  e   \        V4      VP                  8  g   \        V VR,          VVVR7      p\        VR	,          VR
,          VR,          VR,          VR,          V'       g   VR,          VR7      # RVR7      # )a  
Get the status of a crawl job.

Args:
    client: HTTP client instance
    job_id: ID of the crawl job
    pagination_config: Optional configuration for pagination behavior
    request_timeout: Timeout (in seconds) for each individual HTTP request. When auto-pagination 
        is enabled (default) and there are multiple pages of results, this timeout applies to 
        each page request separately, not to the entire operation

Returns:
    CrawlJob with current status and data

Raises:
    Exception: If the status check fails

/v2/crawl/timeoutzget crawl statusrP   Trl   Nr{   re   rf   rg   rh   rj   re   rf   rg   rh   rj   rl   rP   )
rm   rv   r   rw   ro   auto_paginatemax_resultslen_fetch_all_pagesr   )	rr   r}   r~   r{   rx   r`   payloadr\   r   s	   &&&$     r   get_crawl_statusr      s   2 zzJvh/zIH ;;;h(:; MMOM*=9GI 8I%33dM))5	N/;;;$FO+
	 x +&g^,<($1WV_  8< r   c                ^    V ^8  d   QhR\         R\        R\        \        ,          R\        /# )r   rr   next_urlr{   r   )r   rM   r   r   r   )r   s   "r   r   r      s4     $ $$$ e_	$
 $r   c          
        V P                  WR7      pVP                  '       g   \        VR4       VP                  4       p\	        V4      p\        VR,          VR,          VR,          VR,          VR,          VR,          VR	,          R
7      # )au  
Fetch a single page of crawl results using the provided next URL.

Args:
    client: HTTP client instance
    next_url: Opaque next URL from a prior crawl status response
    request_timeout: Timeout (in seconds) for the HTTP request

Returns:
    CrawlJob with the page data and next URL (if any)

Raises:
    Exception: If the request fails or returns an error response
r   zget crawl status pagere   rf   rg   rh   rj   rl   rP   r   )rm   rv   r   rw   ro   r   )rr   r   r{   rx   r`   r   s   &&$   r   get_crawl_status_pager      s    ( zz(z<H;;;h(?@MMOM*=9Gx +&g^,<(V_V_ r   c                    V ^8  d   QhR\         R\        R\        \        ,          R\        \
        ,          R\        \        ,          R\        \        ,          /# )r   rr   r   initial_documentsr~   r{   r   )r   rM   r   r	   r   r   r   )r   s   "r   r   r     s^     H HHH H~H   01	H e_H 
(^Hr   c               0   VP                  4       pTp^ pV'       d   VP                  MRpV'       d   VP                  MRp	V'       d   VP                  MRp
\        P
                  ! 4       pV'       Ed   Ve
   Wx8  d    V# V
e%   \        P
                  ! 4       V,
          V
8  d    V# V P                  WdR7      pVP                  '       g8   ^ RIpVP                  R4      pVP                  RRVP                  /R7        V# VP                  4       p \        V4      pTR,           F*  pT	e   \        T4      T	8  d    MTP!                  T4       K,  	  T	e   \        T4      T	8  d    T# TR,          pT^,          pEK  V#   \         d     T# i ; i)	an  
Fetch all pages of crawl results.

Args:
    client: HTTP client instance
    next_url: URL for the next page
    initial_documents: Documents from the first page
    pagination_config: Optional configuration for pagination limits
    request_timeout: Optional timeout (in seconds) for the underlying HTTP request

Returns:
    List of all documents from all pages
Nr   	firecrawlzFailed to fetch next pagestatus_code)extrarP   rl   )copy	max_pagesr   max_wait_timetime	monotonicrm   rv   logging	getLoggerwarningr   rw   ro   rn   r   r[   )rr   r   r   r~   r{   r\   current_url
page_countr   r   r   
start_timerx   r   logger	page_datapage_payloaddocuments   &&&&$             r   r   r     s   * "&&(IKJ 0A!++dI3D#//$K7H%33dM!J
+!z'>J G %DNN,<z,I]+ZD ? ::k:C{{{&&{3FNN6}hNbNb>cNd0 - MMO		7	BL
 %V,,H&3y>[+HX&	 - "s9~'D  #6*a
%  	" %	s   F FFc                <    V ^8  d   QhR\         R\        R\        /# )r   rr   r}   r   )r   rM   bool)r   s   "r   r   r   ]  s!     6 6 6S 6T 6r   c                    V P                  RV 24      pVP                  '       g   \        VR4       VP                  4       pVP	                  R4      R8H  # )z
Cancel a running crawl job.

Args:
    client: HTTP client instance
    job_id: ID of the crawl job to cancel
    
Returns:
    bool: True if the crawl was cancelled, False otherwise
    
Raises:
    Exception: If the cancellation fails
r   zcancel crawlre   	cancelled)deleterv   r   rw   rm   )rr   r}   rx   r`   s   &&  r   cancel_crawlr   ]  sM     }}z&23H;;;h7MMOMX&+55r   c                    V ^8  d   QhR\         R\        R\        R\        \        ,          R\        \        ,          R\
        /# )r   rr   r}   poll_intervalr   r{   r   )r   rM   intr   r   r   )r   s   "r   r   r   t  sL     +" +"+"+" +" c]	+" e_+" +"r   c                  \         P                  ! 4       p \        V VVR7      pVP                  R9   d   V# Ve4   \         P                  ! 4       V,
          V8  d   \	        RV RV R24      h\         P
                  ! V4       Kq  )a  
Wait for a crawl job to complete, polling for status updates.

Args:
    client: HTTP client instance
    job_id: ID of the crawl job
    poll_interval: Seconds between status checks
    timeout: Maximum seconds to wait (None for no timeout)
    request_timeout: Optional timeout (in seconds) for each status request
    
Returns:
    CrawlJob when job completes
    
Raises:
    Exception: If the job fails
    TimeoutError: If timeout is reached
r   z
Crawl job z did not complete within z seconds)rf   failedr   )r   r   r   re   TimeoutErrorsleep)rr   r}   r   r   r{   r   	crawl_jobs   &&&&$  r   wait_for_crawl_completionr   t  s    2 !J
$+
	 CC DNN$4z$AW#LF83LWIU]^__ 	

=!r   c                    V ^8  d   QhR\         R\        R\        R\        \        ,          R\        \        ,          R\
        /# )r   rr   r   r   r   r{   r   )r   r   r   r   r   r   )r   s   "r   r   r     sL     ) ))) ) c]	) e_) )r   c               `    \        W4      pVP                  pVe   TMTp\        V VVVVR7      # )a  
Start a crawl job and wait for it to complete.

Args:
    client: HTTP client instance
    request: CrawlRequest containing URL and options
    poll_interval: Seconds between status checks
    timeout: Maximum seconds to wait for the entire crawl job to complete (None for no timeout)
    request_timeout: Timeout (in seconds) for each individual HTTP request, including pagination 
        requests when fetching results. If there are multiple pages, each page request gets this timeout
    
Returns:
    CrawlJob when job completes
    
Raises:
    ValueError: If request is invalid
    Exception: If the crawl fails to start or complete
    TimeoutError: If timeout is reached
r   )rz   rt   r   )rr   r   r   r   r{   r   r}   effective_request_timeouts   &&&&$   r   crawlr     sF    8 F,I\\F 4C3NT[ %1 r   c                <    V ^8  d   QhR\         R\        R\        /# rq   )r   r
   r   )r   s   "r   r   r     s,     sN sN sN6H sN_ sNr   c                R   VP                   '       d!   VP                   P                  4       '       g   \        R4      hVP                  '       d!   VP                  P                  4       '       g   \        R4      hRVP                   RVP                  /pV P	                  RV4      pVP
                  '       g   \        VR4       VP                  4       pVP                  R4      '       Ed   VP                  R/ 4      p/ pR	R
RRRRRRRRRRRRRRRRRRRRRR/pR V9   d3   VR ,          p\        V\        4      '       d   \        R3/ VB VR &   MWR &   VP                  4        F  w  rW9   g   K  V	R8X  d   WY,          e   WY,          p/ pR!R"R#R$R%R&R'R(R)R*R+R,/pVP                  4        F  w  rW9   g   K  W,          W&   K  	  R-V9   d8   VR-,          p\        V\        4      '       d   ^R.IHp V! VR/7      VR-&   MVVR-&   VP                  4        F  w  ppVV9  g   K  VR-8w  g   K  VVV&   K  	  WV
&   K  WY,          Wj&   K  	  VP                  4        F  w  ppVV9  g   K  VVV&   K  	  R0V9   d   VR0,          VR0&   \!        R3/ VB # \#        VP                  R1R24      4      h)4a8  
Get crawl parameters from LLM based on URL and prompt.

Args:
    client: HTTP client instance
    request: CrawlParamsRequest containing URL and prompt
    
Returns:
    CrawlParamsData containing suggested crawl options
    
Raises:
    ValueError: If request is invalid
    Exception: If the operation fails
r    zPrompt cannot be emptyr!   r*   z/v2/crawl/params-previewzcrawl params previewrb   rP   r0   r/   r2   r1   r4   r3   r5   r7   r6   r9   r8   r;   r:   r=   r<   r?   r>   rD   rC   r+   r%   rH   rG   r.   includeTagsinclude_tagsexcludeTagsexclude_tagsonlyMainContentonly_main_contentwaitForwait_forskipTlsVerificationskip_tls_verificationremoveBase64Imagesremove_base64_imagesformats)ScrapeFormats)r   r   rc   rd   rZ   )r!   r"   r#   r*   ru   rv   r   rw   rm   rL   r(   r   rN   listtypesr   r   rn   )rr   r   rR   rx   r`   params_dataconverted_paramsrS   webhook_datarU   rT   scrape_opts_dataconverted_scrape_optsscrape_field_mappingsscrape_camelscrape_snakeformats_datar   keyvalues   &&                  r   crawl_params_previewr     s     ;;;gkk//11.//>>>!5!5!7!7122 	w{{'..L {{5|DH ;;;h(>? MMOM###''3 OO!6y#%>$&@!6 "81/-!6
  #&y1L,--.;.Kl.K +.:+&4&:&:&<"J(0[5L5X'2'>$,.)%~%~)+>!:-/F,.D-) 7L6Q6Q6S2';BRB`1? 7T
 !$44'7	'B%lD99=?LUa?b1)<?K1)< '7&<&<&>
U&;;y@P9>1#6 '? 4IZ03>3J$0I '=N &++-JC.((- % .
 %*7	*BY'2!122))'3KLMMr   c                <    V ^8  d   QhR\         R\        R\        /# )r   http_clientcrawl_idr   )r   rM   r   )r   s   "r   r   r   D  s'     G G* G G@S Gr   c           
     r   V P                  RV R24      pVP                  '       g   \        VR4        VP                  4       pVP                  RV4      pRVP                  R. 4      RVP                  RVP                  R. 4      4      /p\	        R
/ VB #   \
         d   p\        RT 24      hR	p?ii ; i)z
Get errors from a crawl job.

Args:
    http_client: HTTP client for making requests
    crawl_id: The ID of the crawl job
    
Returns:
    CrawlErrorsResponse containing errors and robots blocked URLs
    
Raises:
    Exception: If the request fails
r   z/errorszcheck crawl errorsrP   errorsrobots_blockedrobotsBlockedz'Failed to parse crawl errors response: NrZ   )rm   rv   r   rw   r   rn   )r   r   rx   bodyr   
normalizedes   &&     r   get_crawl_errorsr   D  s     H:W=>H;;;h(<=
G}}((64( gkk(B/gkk/7;;GWY[;\]

 #0Z00 GA!EFFGs   A"B B6#B11B6c                0    V ^8  d   QhR\         R\        /# )r   rr   r   )r   r   )r   s   "r   r   r   d  s      f  fj  f-A  fr   c                n   V P                  R4      pVP                  '       g   \        VR4       VP                  4       pVP                  R4      '       g   \	        VP                  RR4      4      hVP                  R. 4      p. pV F  p\        V\        4      '       g   K  VP                  RVP                  R4      RVP                  R	VP                  R4      4      R
VP                  R
4      RVP                  R4      /4       K  	  \        RV Uu. uF  p\        R/ VB NK  	  upR7      # u upi )z
Get a list of currently active crawl jobs.

Args:
    client: HTTP client instance
    
Returns:
    ActiveCrawlsResponse containing a list of active crawl jobs
    
Raises:
    Exception: If the request fails
z/v2/crawl/activezget active crawlsrb   rc   rd   crawlsrt   team_idteamIdr!   optionsT)rb   r   rZ   )
rm   rv   r   rw   rn   rL   r(   r[   r   r   )rr   rx   r   	crawls_innormalized_crawlscncs   &      r   get_active_crawlsr   d  s    zz,-H;;;h(;<==?D88I*BCDD2&Ia$$aeeDk155155+;<quuU|155+	&    Rc5dRcBk6GB6GRc5dee5ds   D2
)N)r   N))__doc__r   typingr   r   r   r   r   r   r   r   r	   r
   r   r   r   r   r   r   r   utilsr   r   r   r   utils.normalizer   r&   rV   r^   ro   rz   r   r   r   r   r   r   r   r   r   rZ   r   r   <module>r      s     , ,    g f 68*GT NF>
 (,> >B$ (,	$NH (,H HV6.+" (,+" +"\) (,) )XsNlG@ fr   