
    @j&                        g d Z ddlZddlZddlZddlZddlZddlZddlZddl	Z	ddl
Z
ddlZ ej        e          Zej         G d dej        j                              Zej         G d dej        j                              Zd Z G d d	ej        j                  Z G d
 de          Z G d de          Z G d de          Z G d de          Z G d de          ZdS ))
SubmissionCommentRedditUserScraperRedditSubredditScraperRedditSearchScraperRedditSubmissionScraper    Nc                      e Zd ZU ej        e         ed<   ej        ed<   eed<   ej        e         ed<   ej        e         ed<   ej        e         ed<   eed<   eed<   ej	        
                    d	d
 d          Zd ZdS )r   authordateidlinkselftext	subreddittitleurlcreatedc                     | j         S Nr   selfs    ^/home/ubuntu/.hermes/hermes-agent/venv/lib/python3.11/site-packages/snscrape/modules/reddit.py<lambda>zSubmission.<lambda>        TY     c                     | j         S r   r   r   s    r   __str__zSubmission.__str__"   	    	/r   N__name__
__module____qualname__typingOptionalstr__annotations__datetimesnscrapebase_DeprecatedPropertyr   r    r   r   r   r      s         		s?3OC    	=,,Y8N8NPVWW    r   r   c                       e Zd ZU ej        e         ed<   eed<   ej        ed<   eed<   ej        e         ed<   ej        e         ed<   eed<   ej	        
                    dd	 d          Zd
 ZdS )r   r
   bodyr   r   parentIdr   r   r   c                     | j         S r   r   r   s    r   r   zComment.<lambda>0   r   r   c                     | j         S r   r   r   s    r   r   zComment.__str__2   r   r   Nr    r,   r   r   r   r   &   s         		
?3OC    	=,,Y8N8NPVWW    r   r   c                    |                      d          r]d| v rY|                     dd          \  }} |                     | d          st          d          |                    dd          \  }}|                     t          j        t          j        z             dk    rt          d          |                    t          j        t          j        z             dk    rt          d          t          |           t          |          k     rdS t          |           t          |          k    rdS | |k     rdS | |k    rdS d	S )
zCompare two Reddit IDs. Returns -1 if id1 is less than id2, 0 if they are equal, and 1 if id1 is greater than id2.

	id1 and id2 may have prefixes like t1_, but if included, they must be present on both and equal.t_   z$id2 must have the same prefix as id1 zinvalid characters in id1zinvalid characters in id2r   )
startswithsplit
ValueErrorstripstringascii_lowercasedigitslen)id1id2prefixr4   s       r   _cmp_idrC   6   s0   
 NN3 C3JJ		#q!!+&#	6	%	% <	:	;	;;99S!&!SIIf$v}455;;.///IIf$v}455;;.///HHs3xx	HHs3xx	
#II	#II	
	r   c                   :     e Zd Z fdZd ZddZd ZddZ xZS )_RedditPushshiftScraperc                 p     t                      j        di | ddt          j        j         i| _        d S )Nz
User-Agentz	snscrape/r,   )super__init__r)   version__version___headers)r   kwargs	__class__s     r   rH   z _RedditPushshiftScraper.__init__P   s?    %''V!KX-=-I!K!KL$---r   c                     |j         dk    r0t                              d           t          j        d           dS |j         dk    rdS dS )Ni  zGot 429 response, sleeping
   )Fzrate-limited   )Fznon-200 status code)TN)status_code_loggerinfotimesleep)r   rs     r   _handle_rate_limitingz-_RedditPushshiftScraper._handle_rate_limitingT   sM    ]c
<<,---:b>>>

]c
&
&	r   Nc                     |                      ||| j        | j                  }|j        dk    r't          j                            d|j                   |                                S )N)paramsheadersresponseOkCallbackrP   zGot status code )_getrK   rW   rQ   r)   r*   ScraperExceptionjson)r   r   rY   rV   s       r   _get_apiz _RedditPushshiftScraper._get_api]   s\    
iifTXTnioo!]c		'	'(J1=(J(J	K	KK	
/r   c                 ,   d|v rt           nt          }|                    d          }||                    d          }|d|v rd|d                             d          rId|v r'd|d          d|d         d	d           d
|d          d}n:d|d         d	d           d
|d          d}nt                              d           d}|                    d          t          j                            |d         t          j        j	                  d| |                    d          d}|t           u r|                    d          pd |d<   |d         s.|d                             d          s|d         nd|d          nd |d<   |d         |d         k    s&|d         
                    dd          |d         k    rd |d<   |d         |d<   d|d          |d<   n1|d         |d<   |                    d          |d<   d|d          |d<    |di |S )Nr   	permalinkpermalink_urllink_idt3_r   z/r/z
/comments/   z/_/r   /z%Unable to find or construct permalinkr
   created_utczhttps://old.reddit.com)r
   r   r   r   r   r   r   z//old.reddit.com/z//www.reddit.com/r.   	parent_idr/   t1_r,   )r   r   getr8   rR   warningr(   fromtimestamptimezoneutcreplace)r   dclsra   rL   s        r   _api_obj_to_itemz(_RedditPushshiftScraper._api_obj_to_itemc   sj   ll

# eeK  )uu_%%9A~~!I,11%88~qQ+QQ!I,qrr2BQQqwQQQii>q|ABB/>>AdG>>>ii__<===Y UU8__**1]+;X=N=RSS
.9
.
.k""	 & 	Jj))1T6*ou  wA  pB  LQuX%8%8%=%=fQuXXCf\]^c\dCfCfCf  HL6&>Vnu%%)>)>?RTg)h)hlrsylz)z)zF6NwZ6'?!$//6$<<fI6&>k**6*!$//6$<	vr   c              #   `  K   d|i }	 |                      ||          }|d         r##t          fd|d         D                       rdS |d         D ]=}t          |d                   dk    r|                     |          V  |d         >|d         d         d         d	z   |d
<   )zRIterate through the Pushshift API using the 'until' parameter and yield the items.NT)rY   datac              3   L   K   | ]}t          |d                    dk    V  dS )r   r   N)rC   ).0rp   lowestIdSeens     r   	<genexpr>z4_RedditPushshiftScraper._iter_api.<locals>.<genexpr>   s8      :t:tcd71T7L;Y;Y]^;^:t:t:t:t:t:tr   r   r7   rg   r5   until)r_   allrC   rr   )r   r   rY   objrp   rw   s        @r   	_iter_apiz!_RedditPushshiftScraper._iter_api   s      ,^68	sV	,	,3
f+ 
,2s:t:t:t:thklrhs:t:t:t7t7t2	EK  qwqw==CC  #####dG\[_]3a76'?8r   r   )	r!   r"   r#   rH   rW   r_   rr   r|   __classcell__rM   s   @r   rE   rE   O   s        M M M M M     $ $ $L8 8 8 8 8 8 8 8r   rE   c                   h     e Zd Zddddd fd
ZdefdZd Zed             Zed	             Z	 xZ
S )
_RedditPushshiftSearchScraperTNsubmissionscommentsbeforeafterc                    t                      j        di | || _        || _        || _        || _        || _        t          |                               | j                  s?t          dt          |           j
                            dd          d          d          | j        s| j        st          d          d S d S )Nzinvalid -r5   z namez5At least one of submissions and comments must be Truer,   )rG   rH   _name_submissions	_comments_before_aftertype_validationFuncr:   namer9   )r   r   r   r   r   r   rL   rM   s          r   rH   z&_RedditPushshiftSearchScraper.__init__   s    %''V$*!$$.$,$+	d	#	#DJ	/	/ H	FtDzz44S!<<Q?FFF	G	GG		 M4> M	K	L	LLM M M Mr   rY   c              #     K   d|d<   | j         
| j         |d<   | j        
| j        |d<   | j        r)|                     d|                                          }nt          d          }| j        r)|                     d|                                          }nt          d          }	 t          |          }n# t          $ r |E d {V  Y d S w xY w	 t          |          }n# t          $ r |V  |E d {V  Y d S w xY w	 |j	        |j	        k    r2|V  	 t          |          }nO# t          $ r |V  |E d {V  Y d S w xY w|V  	 t          |          }n# t          $ r |V  |E d {V  Y d S w xY wu)N1000limitry   sincez1https://api.pushshift.io/reddit/search/submissionr,   .https://api.pushshift.io/reddit/search/comment)
r   r   r   r|   copyiterr   nextStopIterationr   )r   rY   submissionsItercommentsItertipSubmission
tipComments         r   "_iter_api_submissions_and_commentsz@_RedditPushshiftSearchScraper._iter_api_submissions_and_comments   s6     &/	\\6'?	[[6'?	 ^^$WY_YdYdYfYfgg??"XX?	^ ..!QSYS^S^S`S`aa<<r((<
((==	 
 
 
	66

\""::	 
 
 
		66	
:?**
/**]]   
UU	 |$$ZZ   
UUsH   )B9 9CCC# #C=<C=D& &E ?E E E21E2c              #   r   K   |                      t          |           j        | j        i          E d {V  d S r   )r   r   	_apiFieldr   r   s    r   	get_itemsz'_RedditPushshiftSearchScraper.get_items   sB      44d4jj6JDJ5WXXXXXXXXXXXr   c                    |                     ddddd           |                     ddddd	           |                     d
dt          d           |                     ddt          d           | j                            dd          d         }|                     |t          j                            |                     d S )Nz--no-submissionsnoSubmissions
store_trueFzDon't list submissions)destactiondefaulthelpz--no-comments
noCommentszDon't list commentsz--before	TIMESTAMPz%Fetch results before a Unix timestamp)metavarr   r   z--afterz$Fetch results after a Unix timestampr   r5   r   )add_argumentintr   r9   r)   utilsnonempty_string_arg)rq   	subparserr   s      r   _cli_setup_parserz/_RedditPushshiftSearchScraper._cli_setup_parser   s    +Olfk  uN  O  O  O`e  oE  F  F  F{3Ovwwwk#Ntuuu	Q			"$hn&H&H&N&NOOOOOr   c                     | j                             dd          d         }|                     |t          ||          |j         |j         |j        |j                  S )Nr   r5   r   )r   r9   _cli_constructgetattrr   r   r   r   )rq   argsr   s      r   _cli_from_argsz,_RedditPushshiftSearchScraper._cli_from_args   su    	Q			"$			D'$"5"5I[E[lpl{h{  GK  GR  \`  \f	  
g  
g  gr   )r!   r"   r#   rH   dictr   r   classmethodr   r   r}   r~   s   @r   r   r      s        +/D4Y] M M M M M M M1d 1 1 1 1fY Y Y P P +P g g +g g g g gr   r   c                       e Zd ZdZd ZdZdS )r   zreddit-userc                 ,    t          j        d|           S )Nz^[A-Za-z0-9_-]{3,20}$rematchxs    r   r   zRedditUserScraper.<lambda>   s    RX&=qAA r   r
   Nr!   r"   r#   r   r   r   r,   r   r   r   r      s         AAr   r   c                       e Zd ZdZd ZdZdS )r   zreddit-subredditc                 ,    t          j        d|           S )Nz^[A-Za-z0-9][A-Za-z0-9_]{2,20}$r   r   s    r   r   zRedditSubredditScraper.<lambda>   s    RX&GKK r   r   Nr   r,   r   r   r   r      s         KKr   r   c                       e Zd ZdZd ZdZdS )r   zreddit-searchc                     dS )NTr,   r   s    r   r   zRedditSearchScraper.<lambda>   s    T r   qNr   r,   r   r   r   r      s        !>r   r   c                   T     e Zd ZdZ fdZd Zed             Zed             Z xZ	S )r   zreddit-submissionc                    |                     d          r
|dd          n|                    t          j        t          j        z             dk    rt          d           t                      j        di | || _        d S )Nrd   re   r6   zinvalid submissionIdr,   )	r8   r;   r<   r=   r>   r:   rG   rH   _submissionId)r   submissionIdrL   rM   s      r   rH   z RedditSubmissionScraper.__init__  s    &11%88Jl122lQQRXRhkqkxRxyy}	*	+	++%''V#$r   c              #     K   |                      d| j                   }|d         sd S t          |d                   dk    r6t          j                            dt          |d                    d          |                     |d         d                   V  |                     dt          | j        d          d	d
          E d {V  d S )Nz6https://api.pushshift.io/reddit/search/submission?ids=rt   r5   zGot z results instead of 1r   r   $   i  )rc   r   )	r_   r   r?   r)   r*   r]   rr   r|   r   )r   r{   s     r   r   z!RedditSubmissionScraper.get_items  s     ctOaccdd#	V 
	6V		'	'(Vs3v;/?/?(V(V(V	W	WWc&k!n----- ^^LZ]^b^prtZuZu  AE  OF  OF  G  G  G  G  G  G  G  G  G  G  Gr   c                 n    |                     dt          j                            d                     d S )Nr   r   )r   r)   r   r   )rq   r   s     r   r   z)RedditSubmissionScraper._cli_setup_parser  s2    0R0RSa0b0bcccccr   c                 8    |                      ||j                  S r   )r   r   )rq   r   s     r   r   z&RedditSubmissionScraper._cli_from_args  s    			D$"3	4	44r   )
r!   r"   r#   r   rH   r   r   r   r   r}   r~   s   @r   r   r      s        $ $ $ $ $	G 	G 	G d d +d 5 5 +5 5 5 5 5r   r   )__all__dataclassesr(   loggingr   snscrape.baser)   snscrape.utilssnscrape.versionr<   rT   r$   	getLoggerr!   rR   	dataclassr*   Itemr   r   rC   ScraperrE   r   r   r   r   r   r,   r   r   <module>r      sL   E  E  E       				                '
H
%
%
     #         hm    
 
 
2G8 G8 G8 G8 G8hm3 G8 G8 G8TPg Pg Pg Pg Pg$; Pg Pg Pgf    5       :       7   5 5 5 5 55 5 5 5 5 5r   