Skip to content

Commit bc0f472

Browse files
authored
Merge pull request #49 from gene1wood/pagination
Add support for GitHub pagination and ratelimiting
2 parents dc50e1c + 3dc6dd4 commit bc0f472

File tree

1 file changed

+91
-4
lines changed

1 file changed

+91
-4
lines changed

agithub/GitHub.py

Lines changed: 91 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,16 @@
11
# Copyright 2012-2016 Jonathan Paugh and contributors
22
# See COPYING for license details
33
import base64
4+
import time
5+
import re
46

5-
from agithub.base import API, ConnectionProperties, Client
7+
from agithub.base import API, ConnectionProperties, Client, RequestBody, ResponseBody
68

79

810
class GitHub(API):
911
"""
1012
The agnostic GitHub API. It doesn't know, and you don't care.
11-
>>> from agithub import GitHub
13+
>>> from agithub.GitHub import GitHub
1214
>>> g = GitHub('user', 'pass')
1315
>>> status, data = g.issues.get(filter='subscribed')
1416
>>> data
@@ -33,7 +35,7 @@ class GitHub(API):
3335
it automatically supports the full API--so why should you care?
3436
"""
3537
def __init__(self, username=None, password=None, token=None,
36-
*args, **kwargs):
38+
paginate=False, *args, **kwargs):
3739
extraHeaders = {'accept': 'application/vnd.github.v3+json'}
3840
auth = self.generateAuthHeader(username, password, token)
3941
if auth is not None:
@@ -44,7 +46,7 @@ def __init__(self, username=None, password=None, token=None,
4446
extra_headers=extraHeaders
4547
)
4648

47-
self.setClient(Client(*args, **kwargs))
49+
self.setClient(GitHubClient(paginate=paginate, *args, **kwargs))
4850
self.setConnectionProperties(props)
4951

5052
def generateAuthHeader(self, username=None, password=None, token=None):
@@ -66,3 +68,88 @@ def generateAuthHeader(self, username=None, password=None, token=None):
6668
def hash_pass(self, password):
6769
auth_str = ('%s:%s' % (self.username, password)).encode('utf-8')
6870
return 'Basic '.encode('utf-8') + base64.b64encode(auth_str).strip()
71+
72+
class GitHubClient(Client):
73+
def __init__(self, username=None, password=None, token=None,
74+
connection_properties=None, paginate=False):
75+
super(GitHubClient, self).__init__()
76+
self.paginate = paginate
77+
78+
def request(self, method, url, bodyData, headers):
79+
'''Low-level networking. All HTTP-method methods call this'''
80+
81+
headers = self._fix_headers(headers)
82+
url = self.prop.constructUrl(url)
83+
84+
if bodyData is None:
85+
# Sending a content-type w/o the body might break some
86+
# servers. Maybe?
87+
if 'content-type' in headers:
88+
del headers['content-type']
89+
90+
#TODO: Context manager
91+
requestBody = RequestBody(bodyData, headers)
92+
93+
if self.no_ratelimit_remaining():
94+
time.sleep(self.ratelimit_seconds_remaining())
95+
96+
while True:
97+
conn = self.get_connection()
98+
conn.request(method, url, requestBody.process(), headers)
99+
response = conn.getresponse()
100+
status = response.status
101+
content = ResponseBody(response)
102+
self.headers = response.getheaders()
103+
104+
conn.close()
105+
if status == '403' and self.no_ratelimit_remaining():
106+
time.sleep(self.ratelimit_seconds_remaining())
107+
else:
108+
data = content.processBody()
109+
if self.paginate and type(data) == list:
110+
data.extend(self.get_additional_pages())
111+
return status, data
112+
113+
def get_additional_pages(self):
114+
data = []
115+
url = self.get_next_link_url()
116+
if url:
117+
status, data = self.get(url)
118+
data.extend(self.get_additional_pages())
119+
return data
120+
121+
def no_ratelimit_remaining(self):
122+
headers = dict(self.headers if self.headers is not None else [])
123+
return int(headers.get('X-RateLimit-Remaining', 1)) == 0
124+
125+
def ratelimit_seconds_remaining(self):
126+
ratelimit_reset = int(dict(self.headers).get(
127+
'X-RateLimit-Reset', 0))
128+
return max(0, ratelimit_reset - time.time())
129+
130+
def get_next_link_url(self):
131+
'''Given a set of HTTP headers find the RFC 5988 Link header field,
132+
determine if it contains a relation type indicating a next resource and if
133+
so return the URL of the next resource, otherwise return an empty string.
134+
'''
135+
# From https://github.com/requests/requests/blob/master/requests/utils.py
136+
for value in [x[1] for x in self.headers if x[0].lower() == 'link']:
137+
replace_chars = ' \'"'
138+
value = value.strip(replace_chars)
139+
if not value:
140+
return ''
141+
for val in re.split(', *<', value):
142+
try:
143+
url, params = val.split(';', 1)
144+
except ValueError:
145+
url, params = val, ''
146+
link = {'url': url.strip('<> \'"')}
147+
for param in params.split(';'):
148+
try:
149+
key, value = param.split('=')
150+
except ValueError:
151+
break
152+
link[key.strip(replace_chars)] = value.strip(replace_chars)
153+
if link.get('rel') == 'next':
154+
return link['url']
155+
return ''

0 commit comments

Comments
 (0)