@@ -62,15 +62,22 @@ def __del__(self):
6262 def get_session (self ):
6363 return self ._session
6464
65- def Luminati (self , usr , passwd , proxy_port ):
65+ def Luminati (self , usr , passwd , proxy_port , skip_checking_proxy = False ):
6666 """ Setups a luminati proxy without refreshing capabilities.
6767
68+ Note: ``skip_checking_proxy`` is meant to be set to `True` only in
69+ unit tests. Applications using this library must always use the default
70+ value of `False`.
71+
6872 :param usr: scholarly username, optional by default None
6973 :type usr: string
7074 :param passwd: scholarly password, optional by default None
7175 :type passwd: string
7276 :param proxy_port: port for the proxy,optional by default None
7377 :type proxy_port: integer
78+ :param skip_checking_proxy: skip checking if the proxy works,
79+ optional by default False
80+ :type skip_checking_proxy: bool
7481 :returns: whether or not the proxy was set up successfully
7582 :rtype: {bool}
7683
@@ -83,28 +90,36 @@ def Luminati(self, usr , passwd, proxy_port):
8390 password = passwd
8491 port = proxy_port
8592 else :
86- self .logger .info ("Not enough parameters were provided for the Luminati proxy. Reverting to a local connection." )
93+ self .logger .warning ("Not enough parameters were provided for the Luminati proxy. Reverting to a local connection." )
8794 return
8895 session_id = random .random ()
8996 proxy = f"http://{ username } -session-{ session_id } :{ password } @zproxy.lum-superproxy.io:{ port } "
90- proxy_works = self ._use_proxy (http = proxy , https = proxy )
97+ proxy_works = self ._use_proxy (http = proxy , https = proxy , skip_checking_proxy = skip_checking_proxy )
9198 return proxy_works
9299
93- def SingleProxy (self , http = None , https = None ):
100+ def SingleProxy (self , http = None , https = None , skip_checking_proxy = False ):
94101 """
95102 Use proxy of your choice
103+
104+ Note: ``skip_checking_proxy`` is meant to be set to `True` only in
105+ unit tests. Applications using this library must always use the default
106+ value of `False`.
107+
96108 :param http: http proxy address
97109 type http: string
98110 :param https: https proxy adress
99111 :type https: string
112+ :param skip_checking_proxy: skip checking if the proxy works,
113+ optional by default False
114+ :type skip_checking_proxy: bool
100115 :returns: whether or not the proxy was set up successfully
101116 :rtype: {bool}
102117
103118 :Example::
104119 pg = ProxyGenerator()
105120 success = pg.SingleProxy(http = <http proxy adress>, https = <https proxy adress>)
106121 """
107- proxy_works = self ._use_proxy (http = http ,https = https )
122+ proxy_works = self ._use_proxy (http = http , https = https , skip_checking_proxy = skip_checking_proxy )
108123 return proxy_works
109124
110125 def _check_proxy (self , proxies ) -> bool :
@@ -152,25 +167,33 @@ def _refresh_tor_id(self, tor_control_port: int, password: str) -> bool:
152167 self .logger .info (err )
153168 return (False , None )
154169
155- def _use_proxy (self , http : str , https : str = None ) -> bool :
170+ def _use_proxy (self , http : str , https : str = None , skip_checking_proxy : bool = False ) -> bool :
156171 """Allows user to set their own proxy for the connection session.
157172 Sets the proxy, and checks if it works.
158173
159174 :param http: the http proxy
160175 :type http: str
161176 :param https: the https proxy (default to the same as http)
162177 :type https: str
178+ :param skip_checking_proxy: Skip checking if the proxy works (defaults to False)
179+ :type skip_checking_proxy: bool
163180 :returns: whether or not the proxy was set up successfully
164181 :rtype: {bool}
165182 """
166183 if https is None :
167184 https = http
168185
169186 proxies = {'http' : http , 'https' : https }
170- self ._proxy_works = self ._check_proxy (proxies )
187+ if skip_checking_proxy :
188+ self ._proxy_works = True
189+ else :
190+ self ._proxy_works = self ._check_proxy (proxies )
171191 # check if the proxy url contains luminati or scraperapi
172- has_luminati = (True if "lum" in http else False )
173- has_scraperapi = (True if "scraperapi" in http else False )
192+ if http is not None :
193+ has_luminati = (True if "lum" in http else False )
194+ has_scraperapi = (True if "scraperapi" in http else False )
195+ else :
196+ has_luminati , has_scraperapi = False , False
174197 if self ._proxy_works :
175198 if has_luminati :
176199 self .logger .info ("Enabling Luminati proxy" )
@@ -412,30 +435,42 @@ def _close_session(self):
412435 if self ._webdriver :
413436 self ._webdriver .quit ()
414437
415- def FreeProxies (self ):
438+ def FreeProxies (self , timeout = 1 ):
416439 """
417440 Sets up a proxy from the free-proxy library
418441
442+ :param timeout: Timeout for the proxy in seconds, optional
443+ :type timeout: float
419444 :returns: whether or not the proxy was set up successfully
420445 :rtype: {bool}
421446
422447 :Example::
423448 pg = ProxyGenerator()
424449 success = pg.FreeProxies()
425450 """
426- while True :
427- proxy = FreeProxy (rand = True , timeout = 1 ).get ()
451+ freeproxy = FreeProxy (rand = True , timeout = timeout )
452+ # Looping it 60000 times gives us a 85% chance that we try each proxy
453+ # at least once.
454+ for _ in range (60000 ):
455+ proxy = freeproxy .get ()
428456 proxy_works = self ._use_proxy (http = proxy , https = proxy )
429457 if proxy_works :
430458 return proxy_works
431459
432- def ScraperAPI (self , API_KEY , country_code = None , premium = False , render = False ):
460+ self .logger .info ("None of the free proxies are working at the moment. "
461+ "Try again after a few minutes." )
462+
463+ def ScraperAPI (self , API_KEY , country_code = None , premium = False , render = False , skip_checking_proxy = False ):
433464 """
434465 Sets up a proxy using ScraperAPI
435466
436467 The optional parameters are only for Business and Enterprise plans with
437468 ScraperAPI. For more details, https://www.scraperapi.com/documentation/
438469
470+ Note: ``skip_checking_proxy`` is meant to be set to `True` only in
471+ unit tests. Applications using this library must always use the default
472+ value of `False`.
473+
439474 :Example::
440475 pg = ProxyGenerator()
441476 success = pg.ScraperAPI(API_KEY)
@@ -445,6 +480,9 @@ def ScraperAPI(self, API_KEY, country_code=None, premium=False, render=False):
445480 :type country_code: string, optional by default None
446481 :type premium: bool, optional by default False
447482 :type render: bool, optional by default False
483+ :param skip_checking_proxy: skip checking if the proxy works,
484+ optional by default False
485+ :type skip_checking_proxy: bool
448486 :returns: whether or not the proxy was set up successfully
449487 :rtype: {bool}
450488 """
@@ -460,9 +498,6 @@ def ScraperAPI(self, API_KEY, country_code=None, premium=False, render=False):
460498 r ["requestLimit" ] = int (r ["requestLimit" ])
461499 self .logger .info ("Successful ScraperAPI requests %d / %d" ,
462500 r ["requestCount" ], r ["requestLimit" ])
463- if r ["requestCount" ] == r ["requestLimit" ]:
464- self .logger .warning ("ScraperAPI account limit reached." )
465- return False
466501
467502 # ScraperAPI documentation recommends setting the timeout to 60 seconds
468503 # so it has had a chance to try out all the retries.
@@ -478,12 +513,17 @@ def ScraperAPI(self, API_KEY, country_code=None, premium=False, render=False):
478513 prefix += ".render=true"
479514
480515 for _ in range (3 ):
481- proxy_works = self ._use_proxy (http = f'{ prefix } :{ API_KEY } @proxy-server.scraperapi.com:8001' )
516+ proxy_works = self ._use_proxy (http = f'{ prefix } :{ API_KEY } @proxy-server.scraperapi.com:8001' ,
517+ skip_checking_proxy = skip_checking_proxy )
482518 if proxy_works :
483519 return proxy_works
484520
485- self .logger .warning ("ScraperAPI does not seem to work" )
486- return proxy_works
521+ if (r ["requestCount" ] >= r ["requestLimit" ]):
522+ self .logger .warning ("ScraperAPI account limit reached." )
523+ else :
524+ self .logger .warning ("ScraperAPI does not seem to work. Reason unknown." )
525+
526+ return False
487527
488528 def has_proxy (self )-> bool :
489529 return self ._proxy_gen or self ._can_refresh_tor
0 commit comments