Skip to content

Commit bde0159

Browse files
authored
Merge pull request #78 from my-dev-app/fix/rotation
Fix/rotation
2 parents c0a280e + 14e1ea2 commit bde0159

File tree

7 files changed

+114
-92
lines changed

7 files changed

+114
-92
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ targets = [
5050
# Initialize proxy relay
5151
proxy_relay = AProxyRelay(
5252
targets=targets,
53-
timeout=5,
53+
timeout=30,
5454
scrape=True,
5555
filter=True,
5656
zones=['us'],

aproxyrelay/__init__.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,18 +14,19 @@
1414
"""
1515
from asyncio import get_event_loop, gather
1616
from datetime import datetime, UTC
17-
from logging import basicConfig, INFO, DEBUG, getLogger
1817
from typing import Callable
1918
from queue import Queue
2019

20+
import logging
21+
2122
from .core import AProxyRelayCore
2223

2324

2425
class AProxyRelay(AProxyRelayCore):
2526
def __init__(
2627
self,
2728
targets: list[str],
28-
timeout: int = 5,
29+
timeout: int = 30,
2930
scrape: bool = True,
3031
filter: bool = True,
3132
zones: list[str] = ['US'], # noqa: B006
@@ -48,7 +49,7 @@ def __init__(
4849
```py
4950
proxy_relay = AProxyRelay(
5051
targets=targets,
51-
timeout=5,
52+
timeout=30,
5253
scrape=True,
5354
filter=True,
5455
zones=['US', 'DE'],
@@ -58,8 +59,13 @@ def __init__(
5859
```
5960
"""
6061
# Configure the logger
61-
basicConfig(level=INFO if not debug else DEBUG)
62-
self.logger = getLogger(__name__)
62+
logging.basicConfig(level=logging.INFO if not debug else logging.DEBUG)
63+
self.logger = logging.getLogger(__name__)
64+
console_handler = logging.StreamHandler()
65+
console_handler.setLevel(logging.DEBUG if debug else logging.INFO)
66+
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
67+
console_handler.setFormatter(formatter)
68+
self.logger.addHandler(console_handler)
6369

6470
# Initialize Core
6571
AProxyRelayCore.__init__(self)

aproxyrelay/core.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,7 @@ async def get_proxies(self) -> None:
7979

8080
if self.filter and self.scrape:
8181
self.logger.info(f'[aProxyRelay] Validating: Proxies ({self._queue_filter.qsize()}), checking if proxies meet connection requirements ...') # noqa: B950
82-
async with ClientSession(conn_timeout=15) as session:
83-
await self._test_all_proxies(session)
82+
await self._test_all_proxies()
8483
self.logger.info(f'[aProxyRelay] Filter: Found {self._filtered_failed} incompetent and {self._filtered_available} available proxy servers in {datetime.now(UTC) - self.started}') # noqa: B950
8584
else:
8685
while not self._queue_filter.empty():

aproxyrelay/process.py

Lines changed: 16 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@
1111
Process class, once all proxies have been received, we are going to obtain the data for the targets.
1212
This class contains the core mechanics for scraping the targets.
1313
"""
14-
from aiosocks2.connector import ProxyConnector, ProxyClientRequest
15-
from aiohttp import ClientSession
1614
from asyncio import gather
1715
from queue import Queue
1816

@@ -32,31 +30,29 @@ async def _process_targets_main(self) -> None:
3230
"""
3331
self.logger.info('[aProxyRelay] Processing ...')
3432

35-
async with ClientSession(
36-
connector=ProxyConnector(remote_resolve=True),
37-
request_class=ProxyClientRequest,
38-
conn_timeout=self.timeout
39-
) as session:
40-
tasks = []
33+
tasks = []
4134

42-
while not self._queue_target_process.empty():
43-
proxy = self.proxies.get()
44-
if isinstance(proxy, dict):
45-
proxy = f"{proxy['protocol'].replace('https', 'http')}://{proxy['ip']}:{proxy['port']}"
46-
target = self._queue_target_process.get()
35+
while not self._queue_target_process.empty():
36+
proxy = self.proxies.get()
37+
if isinstance(proxy, dict):
38+
proxy = f"{proxy['protocol'].replace('https', 'http')}://{proxy['ip']}:{proxy['port']}"
39+
target = self._queue_target_process.get()
4740

48-
# Append the coroutine object to the tasks list
49-
tasks.append(self._obtain_targets(proxy, target, session))
50-
self.proxies.put(proxy)
41+
# Append the coroutine object to the tasks list
42+
tasks.append(self._obtain_targets(proxy, target))
5143

52-
self.proxies = Queue()
53-
# Use asyncio.gather to concurrently execute all tasks
54-
await gather(*tasks)
44+
# Use asyncio.gather to concurrently execute all tasks
45+
await gather(*tasks)
5546

56-
self.logger.info(f'[aProxyRelay] Processing ({self._queue_target_process.qsize()}) items in Queue ... Please wait...')
47+
self.logger.info(f'[aProxyRelay] Processing ({self._queue_target_process.qsize()}) items in Queue using ({self.proxies.qsize()}) proxies ... Please wait...') # noqa: B950
5748

49+
# Proxy queue is empty but targets are available
5850
if self.proxies.empty() and self._queue_target_process.qsize() > 0:
51+
self.logger.info(
52+
f'[aProxyRelay] All Proxies exhausted ({self._queue_target_process.qsize()}) items left in Queue ... Please wait...'
53+
)
5954
await self.get_proxies()
6055
await self.process_targets()
56+
# Proxy queue has proxies targets are available
6157
elif not self.proxies.empty() and self._queue_target_process.qsize() > 0:
6258
await self.process_targets()

aproxyrelay/req.py

Lines changed: 83 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,15 @@
1010
1111
Class which handles all requests made throughout the library.
1212
"""
13-
from aiohttp.client_exceptions import ClientHttpProxyError, \
14-
ServerDisconnectedError, \
15-
ClientProxyConnectionError, \
13+
from ssl import SSLCertVerificationError, SSLError
14+
from aiohttp import ClientSession, ClientTimeout
15+
from aiohttp.client_exceptions import ServerDisconnectedError, \
1616
ClientResponseError, \
1717
ClientOSError, \
18-
ServerTimeoutError, \
19-
InvalidURL
20-
from aiosocks2.errors import SocksError
21-
from asyncio import gather, TimeoutError
18+
InvalidURL, \
19+
ConnectionTimeoutError
20+
from aiohttp_socks import ProxyConnectionError, ProxyConnector, ProxyError
21+
from asyncio import IncompleteReadError, gather, TimeoutError
2222
from json import dumps
2323

2424
from .scrapers import proxy_list
@@ -79,12 +79,9 @@ async def _request_proxy_page(self, url, session) -> None:
7979
else:
8080
self.proxies.put(row)
8181

82-
async def _test_all_proxies(self, session):
82+
async def _test_all_proxies(self):
8383
"""
8484
Use asyncio.gather to run multiple requests concurrently by executing `self._test_proxy_link`.
85-
86-
Args:
87-
session: aiohttp session without proxy support
8885
"""
8986
# Use asyncio.gather to run multiple tests concurrently
9087
to_filter = []
@@ -95,10 +92,10 @@ async def _test_all_proxies(self, session):
9592

9693
# Remove duplicate entries
9794
to_filter = [dict(x) for x in list(set([tuple(item.items()) for item in to_filter]))]
98-
tasks = [self._test_proxy_link(proxy['proxy'], proxy, session) for proxy in to_filter]
95+
tasks = [self._test_proxy_link(proxy['proxy'], proxy) for proxy in to_filter]
9996
await gather(*tasks)
10097

101-
async def _test_proxy_link(self, proxy_url, data, session) -> None:
98+
async def _test_proxy_link(self, proxy_url, data) -> None:
10299
"""
103100
Asynchronously call gg.my-dev.app, a website built by the creator of this package.
104101
If the connection was successful, the proxy works!
@@ -109,31 +106,48 @@ async def _test_proxy_link(self, proxy_url, data, session) -> None:
109106
proxy_url: The URL of the proxy to be tested.
110107
data: Additional data for the proxy test.
111108
"""
109+
# If port is empty, assume port 80
110+
if data['port'] == '':
111+
data['port'] = '80'
112+
# Make sure port is range
113+
if int(data['port']) < 0 or int(data['port']) > 65535: return
112114
try:
113-
async with session.post(
114-
'https://gg.my-dev.app/api/v1/proxies/validate/lib',
115-
proxy=proxy_url,
116-
headers={
117-
**self._get_header(),
118-
'Content-Type': 'application/json'
119-
},
120-
data=dumps(data)
121-
) as response:
122-
if response.status == 200:
123-
self.proxies.put(data)
124-
self._filtered_available = self._filtered_available + 1
125-
else:
126-
self._filtered_failed = self._filtered_failed + 1
115+
self.logger.debug(f'[aProxyRelay] Processing: {proxy_url} -> Added to queue')
116+
connector = ProxyConnector.from_url(proxy_url.replace('unknown', 'socks4'))
117+
timeout = ClientTimeout(total=self.timeout, connect=self.timeout)
118+
async with ClientSession(connector=connector, timeout=timeout) as session:
119+
async with session.post(
120+
'https://gg.my-dev.app/api/v1/proxies/validate/lib',
121+
headers={
122+
**self._get_header(),
123+
'Content-Type': 'application/json'
124+
},
125+
data=dumps(data)
126+
) as response:
127+
if response.status == 200:
128+
self.proxies.put(data)
129+
self._filtered_available = self._filtered_available + 1
130+
self.logger.debug(f'[aProxyRelay] Succeed: {proxy_url} -> Freshly Discovered')
131+
else:
132+
self._filtered_failed = self._filtered_failed + 1
133+
self.logger.debug(f'[aProxyRelay] Succeed: {proxy_url} -> Addres Known')
127134
except (
128-
ClientHttpProxyError,
129-
ServerDisconnectedError,
130-
ClientProxyConnectionError,
131-
ClientResponseError,
132135
ClientOSError,
133-
ServerTimeoutError,
134136
InvalidURL,
135137
ConnectionResetError,
136-
):
138+
ProxyError,
139+
SSLCertVerificationError,
140+
ProxyConnectionError,
141+
ConnectionTimeoutError,
142+
IncompleteReadError,
143+
UnicodeEncodeError,
144+
SSLError,
145+
ConnectionAbortedError,
146+
ServerDisconnectedError,
147+
ClientResponseError,
148+
TimeoutError
149+
) as e:
150+
self.logger.debug(f'[aProxyRelay] Failed: {proxy_url} -> {repr(e)}')
137151
self._filtered_failed = self._filtered_failed + 1
138152

139153
async def _fetch_proxy_servers(self, urls, session):
@@ -172,7 +186,7 @@ async def _request_proxy_servers(self, url, session) -> None:
172186
self.proxies.put(row)
173187
self._filtered_ggs = self._filtered_ggs + 1
174188

175-
async def _obtain_targets(self, proxy_url, target, session) -> None:
189+
async def _obtain_targets(self, proxy_url, target) -> None:
176190
"""
177191
Asynchronously fetch the targets with our proxies.
178192
The 'steam' variable should be defaulted to False and should only be used when targeting Steam.
@@ -182,37 +196,44 @@ async def _obtain_targets(self, proxy_url, target, session) -> None:
182196
proxy_url: The URL of the proxy to be used for the request.
183197
"""
184198
try:
185-
async with session.get(
186-
target,
187-
proxy=proxy_url,
188-
headers={
189-
**self._get_header(),
190-
'Content-Type': 'application/json'
191-
},
192-
) as response:
193-
status = response.status
194-
if status in (200, 202,):
195-
self.proxies.put(proxy_url)
196-
data = await response.json()
197-
if data:
198-
if pack := self.unpack(data, target):
199-
self._queue_result.put(pack)
199+
connector = ProxyConnector.from_url(proxy_url.replace('unknown', 'socks4'))
200+
timeout = ClientTimeout(total=self.timeout, connect=self.timeout)
201+
async with ClientSession(connector=connector, timeout=timeout) as session:
202+
async with session.get(
203+
target,
204+
headers={
205+
**self._get_header(),
206+
'Content-Type': 'application/json'
207+
},
208+
) as response:
209+
status = response.status
210+
if status in (200, 202,):
211+
self.proxies.put(proxy_url)
212+
data = await response.json()
213+
if data:
214+
if pack := self.unpack(data, target):
215+
self._queue_result.put(pack)
216+
else:
217+
self.logger.warning(f'[aProxyRelay] Could not unpack data for: {target}')
200218
else:
201-
self.logger.warning(f'[aProxyRelay] Could not unpack data for: {target}')
219+
self.logger.warning(f'[aProxyRelay] Target {target} Data seems to be None: {data}')
202220
else:
203-
self.logger.warning(f'[aProxyRelay] Target {target} Data seems to be None: {data}')
204-
else:
205-
self._queue_target_process.put(target)
206-
221+
self._queue_target_process.put(target)
207222
except (
208-
ClientHttpProxyError,
209-
ServerDisconnectedError,
210-
ClientProxyConnectionError,
211-
ClientResponseError,
212223
ClientOSError,
213-
ServerTimeoutError,
214224
InvalidURL,
215-
SocksError,
216-
TimeoutError,
217-
):
225+
ConnectionResetError,
226+
ProxyError,
227+
SSLCertVerificationError,
228+
ProxyConnectionError,
229+
ConnectionTimeoutError,
230+
IncompleteReadError,
231+
UnicodeEncodeError,
232+
SSLError,
233+
ConnectionAbortedError,
234+
ServerDisconnectedError,
235+
ClientResponseError,
236+
TimeoutError
237+
) as e:
238+
self.logger.debug(f'[aProxyRelay] Failed: {target} -> {repr(e)}')
218239
self._queue_target_process.put(target)

example.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
# Initialize proxy relay
1010
proxy_relay = AProxyRelay(
1111
targets=targets,
12-
timeout=5,
12+
timeout=30,
1313
scrape=True,
1414
filter=True,
1515
zones=['us'],

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
packages=find_packages(),
1515
install_requires=[
1616
'aiohttp',
17-
'aiosocks2',
17+
'aiohttp_socks',
1818
'beautifulsoup4',
1919
],
2020
extras_require={

0 commit comments

Comments
 (0)