+
    Vi@                        R t ^ RIt^ RIHtHtHtHtHtHt ^RI	H
t
HtHtHtHtHtHt ^RIHtHtHtHt ^RIHt ^RI	Ht R R ltR	 R
 ltRRRRRRRRRRRRRRRR/R R lltR.R R lltRR/R R lltR.R R lltR R ltR/R R llt RRRRRRRRRRRRRRRRR ^R!R/
R" R# llt!R$ R% lt"RRRRRRRRRRRRRR/R& R' llt#R0R( R) llt$R1R* R+ llt%R, R- lt&R# )2z4
Batch scraping functionality for Firecrawl v2 API.
N)OptionalListCallableDictAnyUnion)BatchScrapeRequestBatchScrapeResponseBatchScrapeJobScrapeOptionsDocumentWebhookConfigPaginationConfig)
HttpClienthandle_response_errorvalidate_scrape_optionsprepare_scrape_options)normalize_document_input)CrawlErrorsResponsec                r    V ^8  d   QhR\         \        \        ,          ,          R\        \        ,          /# )   	data_listreturn)r   r   r   r   )formats   "Y/home/ubuntu/hermes-agent/venv/lib/python3.14/site-packages/firecrawl/v2/methods/batch.py__annotate__r      s'      Xd3i-@ T(^     c                     . pT ;'       g    .  F@  p\        V\        4      '       g   K  \        V4      pVP                  \	        R/ VB 4       KB  	  V# )N )
isinstancedictr   appendr   )r   	documentsdoc
normalizeds   &   r   _parse_batch_scrape_documentsr%      sN     "IBc4  1#6JX3
34  r   c                t    V ^8  d   QhR\         \        \        3,          R\         \        \        3,          /# )r   bodyr   )r   strr   )r   s   "r   r   r      s*      d38n c3h r   c                    V P                  R 4      '       g   \        V P                  RR4      4      hRV P                  R4      RV P                  R^ 4      RV P                  R^ 4      RV P                  R4      RV P                  R	4      R
V P                  R
4      R\        V P                  R. 4      ;'       g    . 4      /# )successerrorUnknown error occurredstatus	completedtotalcredits_usedcreditsUsed
expires_at	expiresAtnextdata)get	Exceptionr%   )r'   s   &r   #_parse_batch_scrape_status_responser8      s    88I*BCDD 	$((8$TXXk1-'1%/dhh{+ -dhhvr.B.H.HbI r   optionswebhookappend_to_idignore_invalid_urlsmax_concurrencyzero_data_retentionintegrationidempotency_keyc                   V ^8  d   QhR\         R\        \        ,          R\        \        ,          R\        \
        \        \        3,          ,          R\        \        ,          R\        \        ,          R\        \        ,          R\        \        ,          R	\        \        ,          R
\        \        ,          R\        /# )r   clienturlsr9   r:   r;   r<   r=   r>   r?   r@   r   )
r   r   r(   r   r   r   r   boolintr	   )r   s   "r   r   r   -   s     7 77
s)7 m$	7
 eC./07 3-7 "$7 c]7 "$7 #7 c]7 7r   c                  \        VVVVVVVVR7      p
V P                  V	4      pV P                  RWR7      pVP                  '       g   \	        VR4       VP                  4       pVP                  R4      '       g   \        VP                  RR4      4      h\        VP                  R4      VP                  R	4      VP                  R
4      ;'       g    RR7      # )a#  
Start a batch scrape job for multiple URLs.

Args:
    client: HTTP client instance
    urls: List of URLs to scrape
    options: Scraping options
    
Returns:
    BatchScrapeResponse containing job information
    
Raises:
    FirecrawlError: If the batch scrape operation fails to start
)r9   r:   r;   r<   r=   r>   r?   z/v2/batch/scrape)headerszstart batch scraper*   r+   r,   idurlinvalidURLsN)rH   rI   invalid_urls)	prepare_batch_scrape_request_prepare_headerspostokr   jsonr6   r7   r	   )rB   rC   r9   r:   r;   r<   r=   r>   r?   r@   request_datarG   responser'   s   &&$$$$$$$$    r   start_batch_scraperS   -   s    8 0!/'/	L %%o6G{{-|{MH ;;;h(<= ==?D88I*BCDD88D>HHUOXXm,44 r   c                ^    V ^8  d   QhR\         R\        R\        \        ,          R\        /# )r   rB   job_idpagination_configr   )r   r(   r   r   r
   )r   s   "r   r   r   g   s6     1 111   011 	1r   c           
        V P                  RV 24      pVP                  '       g   \        VR4       VP                  4       p\	        V4      pVR,          pV'       d   VP
                  MRpV'       d%   VR,          '       d   \        V VR,          VV4      p\        VR,          VR,          VR,          VR	,          VR
,          V'       g   VR,          VR7      # RVR7      # )a7  
Get the status of a batch scrape job.

Args:
    client: HTTP client instance
    job_id: ID of the batch scrape job
    pagination_config: Optional configuration for pagination behavior
    
Returns:
    BatchScrapeJob containing job status and data
    
Raises:
    FirecrawlError: If the status check fails
/v2/batch/scrape/zget batch scrape statusr5   Tr4   r-   r.   r/   r0   r2   Nr-   r.   r/   r0   r2   r4   r5   )r6   rO   r   rP   r8   auto_paginate_fetch_all_batch_pagesr
   )rB   rU   rV   rR   r'   payloadr"   rZ   s   &&&     r   get_batch_scrape_statusr]   g   s    ( zz-fX67H ;;;h(AB ==?D1$7GI 8I%33dM*FO	
	 x +&g^,<($1WV_  8< r   request_timeoutc                ^    V ^8  d   QhR\         R\        R\        \        ,          R\        /# )r   rB   next_urlr^   r   )r   r(   r   floatr
   )r   s   "r   r   r      s4     $ $$$ e_	$
 $r   c          
        V P                  WR7      pVP                  '       g   \        VR4       VP                  4       p\	        V4      p\        VR,          VR,          VR,          VR,          VR,          VR,          VR	,          R
7      # )a  
Fetch a single page of batch scrape results using the provided next URL.

Args:
    client: HTTP client instance
    next_url: Opaque next URL from a prior batch scrape status response
    request_timeout: Timeout (in seconds) for the HTTP request

Returns:
    BatchScrapeJob with the page data and next URL (if any)

Raises:
    Exception: If the request fails or returns an error response
)timeoutzget batch scrape status pager-   r.   r/   r0   r2   r4   r5   rY   )r6   rO   r   rP   r8   r
   )rB   r`   r^   rR   r'   r\   s   &&$   r   get_batch_scrape_status_pagerd      s    ( zz(z<H;;;h(FG==?D1$7Gx +&g^,<(V_V_ r   c                    V ^8  d   QhR\         R\        R\        \        ,          R\        \
        ,          R\        \        ,          /# )r   rB   r`   initial_documentsrV   r   )r   r(   r   r   r   r   )r   s   "r   r   r      sO     D DDD H~D   01	D
 
(^Dr   c                .   VP                  4       pTp^ pV'       d   VP                  MRpV'       d   VP                  MRpV'       d   VP                  MRp	\        P
                  ! 4       p
V'       Ed   Ve
   Wg8  d    V# V	e%   \        P
                  ! 4       V
,
          V	8  d    V# V P                  V4      pVP                  '       g8   ^ RIpVP                  R4      pVP                  RRVP                  /R7        V# VP                  4       p \        V4      pTR,           F*  pTe   \        T4      T8  d    MTP!                  T4       K,  	  Te   \        T4      T8  d    T# TR,          pT^,          pEK  V#   \         d     T# i ; i)a&  
Fetch all pages of batch scrape results.

Args:
    client: HTTP client instance
    next_url: URL for the next page
    initial_documents: Documents from the first page
    pagination_config: Optional configuration for pagination limits
    
Returns:
    List of all documents from all pages
N	firecrawlzFailed to fetch next pagestatus_code)extrar5   r4   )copy	max_pagesmax_resultsmax_wait_timetime	monotonicr6   rO   logging	getLoggerwarningri   rP   r8   r7   lenr!   )rB   r`   rf   rV   r"   current_url
page_countrl   rm   rn   
start_timerR   rq   logger	page_datapage_payloaddocuments   &&&&             r   r[   r[      s   $ "&&(IKJ 0A!++dI3D#//$K7H%33dM!J
+!z'>H E %DNN,<z,I]+ZB = ::k*{{{&&{3FNN6}hNbNb>cNd. + MMO		>yIL
 %V,,H&3y>[+HX&	 - "s9~'D  #6*a
%  	" %	s   F FFc                <    V ^8  d   QhR\         R\        R\        /# r   rB   rU   r   )r   r(   rD   )r   s   "r   r   r   	  s&     - --- 
-r   c                    V P                  RV 24      pVP                  '       g   \        VR4       VP                  4       pVP	                  R4      R8H  # )z
Cancel a running batch scrape job.

Args:
    client: HTTP client instance
    job_id: ID of the batch scrape job to cancel
    
Returns:
    BatchScrapeStatusResponse with updated status
    
Raises:
    FirecrawlError: If the cancellation fails
rX   zcancel batch scraper-   	cancelled)deleterO   r   rP   r6   )rB   rU   rR   r'   s   &&  r   cancel_batch_scraper   	  sQ    $ }}09:H ;;;h(=> ==?D88H,,r   c          
      j    V ^8  d   QhR\         R\        R\        R\        \        ,          R\        /# )r   rB   rU   poll_intervalrc   r   )r   r(   rE   r   r
   )r   s   "r   r   r   &  s>     $" $"$"$" $" c]	$"
 $"r   c                   \         P                  ! 4       p \        W4      pVP                  R9   d   V# V'       d4   \         P                  ! 4       V,
          V8  d   \	        RV RV R24      h\         P
                  ! V4       Kr  )a  
Wait for a batch scrape job to complete, polling for status updates.

Args:
    client: HTTP client instance
    job_id: ID of the batch scrape job
    poll_interval: Seconds between status checks
    timeout: Maximum seconds to wait (None for no timeout)
    
Returns:
    BatchScrapeStatusResponse when job completes
    
Raises:
    FirecrawlError: If the job fails or timeout is reached
    TimeoutError: If timeout is reached
zBatch scrape job z did not complete within z seconds)r.   failedr   )ro   rp   r]   r-   TimeoutErrorsleep)rB   rU   r   rc   rw   
status_jobs   &&&&  r   wait_for_batch_completionr   &  sy    , !J
,V<
  DD (:5@!26(:ST[S\\deff 	

=!r   r   rc   c                   V ^8  d   QhR\         R\        \        ,          R\        \        ,          R\        \
        \        \        3,          ,          R\        \        ,          R\        \        ,          R\        \        ,          R\        \        ,          R	\        \        ,          R
\        \        ,          R\        R\        \        ,          R\        /# )r   rB   rC   r9   r:   r;   r<   r=   r>   r?   r@   r   rc   r   )
r   r   r(   r   r   r   r   rD   rE   r
   )r   s   "r   r   r   M  s     3 33
s)3 m$	3
 eC./03 3-3 "$3 c]3 "$3 #3 c]3 3 c]3 3r   c       
        \    \        V VVVVVVVVV	R7
      pVP                  p\        WW4      # )a  
Start a batch scrape job and wait for it to complete.

Args:
    client: HTTP client instance
    urls: List of URLs to scrape
    options: Scraping options
    poll_interval: Seconds between status checks
    timeout: Maximum seconds to wait (None for no timeout)
    
Returns:
    BatchScrapeStatusResponse when job completes
    
Raises:
    FirecrawlError: If the batch scrape fails to start or complete
    TimeoutError: If timeout is reached
)r9   r:   r;   r<   r=   r>   r?   r@   )rS   rH   r   )rB   rC   r9   r:   r;   r<   r=   r>   r?   r@   r   rc   startrU   s   &&$$$$$$$$$$  r   batch_scraper   M  sL    B !/'/'E XXF % r   c                \    V ^8  d   QhR\         \        ,          R\         \        ,          /# )r   rC   r   )r   r(   )r   s   "r   r   r     s"      d3i DI r   c                P   V '       g   \        R4      h. pV  F  pV'       d   \        V\        4      '       g   \        RV 24      hVP                  R4      '       g&   VP                  R4      '       g   \        RV 24      hVP	                  VP                  4       4       K  	  V# )z
Validate and normalize a list of URLs for batch scraping.

Args:
    urls: List of URLs to validate
    
Returns:
    Validated list of URLs
    
Raises:
    ValueError: If URLs are invalid
zURLs list cannot be emptyzInvalid URL: zhttp://zhttps://z)URL must start with http:// or https://: )
ValueErrorr   r(   
startswithr!   strip)rC   validated_urlsrI   s   &  r   validate_batch_urlsr     s     455N*S#..}SE233 y))S^^J-G-GHNOOciik*  r   c                V   V ^8  d   QhR\         \        ,          R\        \        ,          R\        \        \        \
        3,          ,          R\        \        ,          R\        \        ,          R\        \        ,          R\        \        ,          R\        \        ,          R	\        /	# )
r   rC   r9   r:   r;   r<   r=   r>   r?   r   )	r   r(   r   r   r   r   rD   rE   r    )r   s   "r   r   r     s     / /
s)/ m$/ eC./0	/
 3-/ "$/ c]/ "$/ #/ 
/r   c               d   \        V 4      pRV/p	V'       d%   \        V4      p
V
'       d   V	P                  V
4       Ve1   \        V\        4      '       d   W)R&   MVP                  RR7      V	R&   Ve   W9R&   Ve   WIR&   Ve   WYR&   Ve   WiR&   Ve   \	        V4      P                  4       V	R	&   V	# )
z
Prepare a batch scrape request payload.

Args:
    urls: List of URLs to scrape
    options: Scraping options
    
Returns:
    Request payload dictionary
rC   r:   T)exclude_none
appendToIdignoreInvalidURLsmaxConcurrencyzeroDataRetentionr?   )r   r   updater   r(   
model_dumpr   )rC   r9   r:   r;   r<   r=   r>   r?   r   rQ   scrape_datas   &$$$$$$$   r   rL   rL     s    * ).N$*N#;L ,W5, gs##&-#&-&8&8d&8&KL#%1\"&,?()")8%&&,?()&)+&6&<&<&>]#r   c                ~    V ^8  d   QhR\         \        ,          R\        R\         \         \        ,          ,          /# )r   rC   
chunk_sizer   )r   r(   rE   )r   s   "r   r   r     s-      T#Y C $tCy/ r   c                v    . p\        ^ \        V 4      V4       F  pVP                  WW1,            4       K  	  V# )z
Split a large list of URLs into smaller chunks for batch processing.

Args:
    urls: List of URLs to chunk
    chunk_size: Maximum size of each chunk
    
Returns:
    List of URL chunks
)rangert   r!   )rC   r   chunksis   &&  r   
chunk_urlsr     s8     F1c$i,dQ^,- -Mr   c                    V ^8  d   QhR\         R\        \        ,          R\        \        ,          R\
        R\
        R\        \
        ,          R\        \        ,          /# )r   rB   rC   r9   r   r   rc   r   )r   r   r(   r   r   rE   r   )r   s   "r   r   r     s_     - --
s)- m$- 	-
 - c]- 
(^-r   c           	         \        W4      p. p^ pV FI  p	\        V V	VVVR7      p
V
P                  '       d   VP                  V
P                  4       V^,          pKK  	  V# )a  
Process a large batch of URLs by splitting into smaller chunks.

Args:
    client: HTTP client instance
    urls: List of URLs to scrape
    options: Scraping options
    chunk_size: Size of each batch chunk
    poll_interval: Seconds between status checks
    timeout: Maximum seconds to wait per chunk
    
Returns:
    List of all scraped documents
    
Raises:
    FirecrawlError: If any chunk fails
)r9   r   rc   )r   r   r5   extend)rB   rC   r9   r   r   rc   
url_chunksall_documentscompleted_chunkschunkresults   &&&&&&     r   process_large_batchr     sl    2 D-JM'
 ;;;  -A   r   c                <    V ^8  d   QhR\         R\        R\        /# r}   )r   r(   r   )r   s   "r   r   r     s"     - -J - -@S -r   c           
     .   V P                  RV R24      pVP                  '       g   \        VR4       VP                  4       pVP                  RV4      pRVP                  R. 4      RVP                  RVP                  R. 4      4      /p\	        R/ VB # )	z
Get errors for a batch scrape job.

Args:
    client: HTTP client instance
    job_id: ID of the batch scrape job

Returns:
    CrawlErrorsResponse with errors and robots-blocked URLs
rX   z/errorszget batch scrape errorsr5   errorsrobots_blockedrobotsBlockedr   )r6   rO   r   rP   r   )rB   rU   rR   r'   r\   r$   s   &&    r   get_batch_scrape_errorsr     s     zz-fXW=>H;;;h(AB==?Dhhvt$G'++h+'++ow{{CSUW7XYJ ,,,r   )N)r   N)d   )Nr   r   N)'__doc__ro   typingr   r   r   r   r   r   typesr   r	   r
   r   r   r   r   utilsr   r   r   r   utils.normalizer   r   r%   r8   rS   r]   rd   r[   r   r   r   r   rL   r   r   r   r   r   r   <module>r      s    = =   g f 6 '7 (,	7
 487 #'7 +/7 &*7 +/7 "&7 &*7t1h$ (,	$NDN-:$"N3 (,	3
 483 #'3 +/3 &*3 +/3 "&3 &*3 3 "3l</ (,/ 48	/
 #'/ +// &*/ +// "&/d"-`-r   