diff --git a/ChangeLog.md b/ChangeLog.md index 3656f93..b4a4418 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -23,18 +23,21 @@ Upcoming Unscheduled ----------- -* Support encoding/serialization request bodies, analogous to the - decoding/de-serialization for response bodies which is done in the - Content class - - - This probably means reorganizing the Content class, perhaps adding - another level of structure - - Find a convenient way for the user to specify request-body - content-type. Maybe add a `content_type=` parameter to `put()` et - al? - - - Does GitHub support this? It should. And if so, we should use it - by default +* Support reusing TCP connections, and "pipelining" of requests, a la + RFC 2068, Sect 8.1, L2377 + + - The user must ask for pipelining, and supply a callback function + to be called after a response is received. + - Rename Client.request() -> Client.addRequest() (or such) + - Have Client.addRequest() check whether a persistent connection is + wanted, and save the entire request in a Client.pendingRequests + list in that case + - Create a new Client.sendRequests() method which the user may call + to send all requests up the pipeline. (It should work even if the + server does not support pipelining) + - Call the user-supplied callback whenever a request is received. + There are some concurrency issues here, and we may elect to call + the callback only after *all* requests are received. * Create a script to pack the basic module and any given set of service-specific classes as one file @@ -88,6 +91,23 @@ v2.0 * Support XML de-serialization. Python has (I think) built-in support for this +* Support encoding/serialization request bodies, analogous to the + decoding/de-serialization for response bodies which is done in the + Content class + + - This probably means reorganizing the Content class, perhaps adding + another level of structure + - Find a convenient way for the user to specify request-body + content-type. Maybe add a `content_type=` parameter to `put()` et + al? + + - Does GitHub support this? It should. And if so, we should use it + by default + +* Parse Content-Type header correctly; make a dict of the + parameters (Content.ctypeParameters) available to the media-type + handlers + v1.1.1 ------ diff --git a/README.md b/README.md index 991bd2c..83d2b87 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,10 @@ -# The Agnostic Github API +# The Agnostic GitHub API *It doesn't know, and you don't care!* `agithub` is a REST API client tailored to https://api.github.com, with a transparent syntax which facilitates rapid prototyping. It's code is lightweight: easy to understand, modify, and integrate. It's most -salient feature is that it doesn't know the Github API — but +salient feature is that it doesn't know the GitHub API — but that doesn't matter, since it fully supports it *anyway*. While browsing the @@ -28,12 +28,12 @@ can read the docs and immediately know how to do the examples via ## Example App -1. First, instantiate a `Github` object, passing it your username and +1. First, instantiate a `GitHub` object, passing it your username and password if an authenticated session is desired. ```python - >>> from agithub import Github - >>> g = Github('user', 'pass') + >>> from agithub import GitHub + >>> g = GitHub('user', 'pass') ``` 2. When you make a request, the status and response body are passed back @@ -83,7 +83,7 @@ can read the docs and immediately know how to do the examples via You may find this useful — or not. -6. Finally, `agithub` knows nothing at all about the Github API, and it +6. Finally, `agithub` knows nothing at all about the GitHub API, and it won't second-guess you. ```python @@ -91,7 +91,7 @@ can read the docs and immediately know how to do the examples via (404, {'message': 'Not Found'}) ``` - The error message you get is directly from Github's API. This gives + The error message you get is directly from GitHub's API. This gives you all of the information you need to survey the situation. 7. If you need more information, the response headers of the previous @@ -113,7 +113,7 @@ crop up: 1. Networking Exceptions (from the `http` library). Catch these with `try .. catch` blocks, as you otherwise would. -2. Github API errors. These means you're doing something wrong with the +2. GitHub API errors. These means you're doing something wrong with the API, and they are always evident in the response's status. The API considerately returns a helpful error message in the JSON body. diff --git a/agithub.py b/agithub.py index 5db3c6f..c344c7f 100644 --- a/agithub.py +++ b/agithub.py @@ -5,6 +5,8 @@ import re from functools import partial, update_wrapper +import xml.dom.minidom + import sys if sys.version_info[0:2] > (3,0): import http.client @@ -15,15 +17,15 @@ import urllib as urllib urllib.parse = urllib -VERSION = [1,2] +VERSION = [2,0] STR_VERSION = 'v' + '.'.join(str(v) for v in VERSION) # These headers are implicitly included in each request; however, each # can be explicitly overridden by the client code. (Used in Client # objects.) -_default_headers = { - #XXX: Header field names MUST be lowercase; this is not checked +DEFAULT_HEADERS = { 'user-agent': 'agithub/' + STR_VERSION + , 'content-type' : 'application/json' } class API(object): @@ -31,12 +33,12 @@ class API(object): The toplevel object, and the "entry-point" into the client API. Subclass this to develop an application for a particular REST API. - Model your __init__ after the Github example. + Model your __init__ after the GitHub example. ''' def __init__(self, *args, **kwargs): raise Exception ( 'Please subclass API and override __init__() to' - 'provide a ConnectionProperties object. See the Github' + 'provide a ConnectionProperties object. See the GitHub' ' class for an example' ) @@ -56,10 +58,10 @@ def __repr__(self): def getheaders(self): return self.client.headers -class Github(API): - '''The agnostic Github API. It doesn't know, and you don't care. - >>> from agithub import Github - >>> g = Github('user', 'pass') +class GitHub(API): + '''The agnostic GitHub API. It doesn't know, and you don't care. + >>> from agithub import GitHub + >>> g = GitHub('user', 'pass') >>> status, data = g.issues.get(filter='subscribed') >>> data ... [ list_, of, stuff ] @@ -78,7 +80,7 @@ class Github(API): That's all there is to it. (blah.post() should work, too.) - NOTE: It is up to you to spell things correctly. A Github object + NOTE: It is up to you to spell things correctly. A GitHub object doesn't even try to validate the url you feed it. On the other hand, it automatically supports the full API--so why should you care? ''' @@ -109,7 +111,7 @@ class RequestBuilder(object): You can use item access instead of attribute access. This is convenient for using variables\' values and required for numbers. - >>> Github('user','pass').whatever[1][x][y].post() + >>> GitHub('user','pass').whatever[1][x][y].post() To understand the method(...) calls, check out github.client.Client. ''' @@ -118,7 +120,7 @@ def __init__(self, client): self.url = '' def __getattr__(self, key): - if key in self.client.http_methods: + if key in self.client.httpMethods: mfun = getattr(self.client, key) fun = partial(mfun, url=self.url) return update_wrapper(fun, mfun) @@ -138,7 +140,7 @@ def __repr__(self): return '%s: %s' % (self.__class__, self.url) class Client(object): - http_methods = ( + httpMethods = ( 'head', 'get', 'post', @@ -147,7 +149,7 @@ class Client(object): 'patch', ) - default_headers = {} + defaultHeaders = {} headers = None def __init__(self, username=None, @@ -165,13 +167,13 @@ def __init__(self, username=None, if password is None and token is None: raise TypeError("You need a password to authenticate as " + username) if password is not None and token is not None: - raise TypeError("You cannot use both password and oauth token authenication") + raise TypeError("You cannot use both password and OAuth token authentication") - self.auth_header = None + self.authHeader = None if password is not None: - self.auth_header = self.hash_pass(password) + self.authHeader = self.hashPassword(password) elif token is not None: - self.auth_header = 'Token %s' % token + self.authHeader = 'Token %s' % token def setConnectionProperties(self, props): ''' @@ -183,86 +185,92 @@ def setConnectionProperties(self, props): raise TypeError("Client.setConnectionProperties: Expected ConnectionProperties object") self.prop = props + self.defaultHeaders = DEFAULT_HEADERS.copy() if self.prop.extra_headers is not None: - self.default_headers = _default_headers.copy() - self.default_headers.update(self.prop.extra_headers) + self.defaultHeaders.update(self.prop.extra_headers) - # Enforce case restrictions on self.default_headers - tmp_dict = {} - for k,v in self.default_headers.items(): - tmp_dict[k.lower()] = v - self.default_headers = tmp_dict + # Enforce case restrictions on self.defaultHeaders + self.defaultHeaders = self.caseConvertHeaders(self.defaultHeaders) def head(self, url, headers={}, **params): - url += self.urlencode(params) + url += self.urlEncode(params) return self.request('HEAD', url, None, headers) def get(self, url, headers={}, **params): - url += self.urlencode(params) + url += self.urlEncode(params) return self.request('GET', url, None, headers) def post(self, url, body=None, headers={}, **params): - url += self.urlencode(params) - return self.request('POST', url, json.dumps(body), headers) + url += self.urlEncode(params) + return self.request('POST', url, body, headers) def put(self, url, body=None, headers={}, **params): - url += self.urlencode(params) - return self.request('PUT', url, json.dumps(body), headers) + url += self.urlEncode(params) + return self.request('PUT', url, body, headers) def delete(self, url, headers={}, **params): - url += self.urlencode(params) + url += self.urlEncode(params) return self.request('DELETE', url, None, headers) def patch(self, url, body=None, headers={}, **params): - """ - Do a http patch request on the given url with given body, headers and parameters - Parameters is a dictionary that will will be urlencoded - """ - url += self.urlencode(params) - return self.request(self.PATCH, url, json.dumps(body), headers) + ''' + Do a http patch request on the given url with given body, + headers and parameters Parameters is a dictionary that will will + be url-encoded + ''' + url += self.urlEncode(params) + return self.request(self.PATCH, url, body, headers) def request(self, method, url, body, headers): '''Low-level networking. All HTTP-method methods call this''' - headers = self._fix_headers(headers) + headers = self.updateWithDefaultHeaders(headers) + + if body is None: + # Sending a content-type header wo/body might break some + # servers. Is this far-fetched? + del headers['content-type'] if self.username: - headers['authorization'] = self.auth_header + headers['authorization'] = self.authHeader - #TODO: Context manager - conn = self.get_connection() - conn.request(method, url, body, headers) + + reqBody = RequestBody(body, headers) + conn = self.getConnection() + conn.request(method, url, reqBody.process(), headers) response = conn.getresponse() status = response.status - content = Content(response) + resBody = ResponseBody(response) self.headers = response.getheaders() conn.close() - return status, content.processBody() + return status, resBody.process() - def _fix_headers(self, headers): + def caseConvertHeaders(self, headers): # Convert header names to a uniform case - tmp_dict = {} + tmpDict = {} for k,v in headers.items(): - tmp_dict[k.lower()] = v - headers = tmp_dict + tmpDict[k.lower()] = v + return tmpDict - # Add default headers (if unspecified) - for k,v in self.default_headers.items(): + def updateWithDefaultHeaders(self, headers): + # Add default headers (if absent) + headers = self.caseConvertHeaders(headers) + for k,v in self.defaultHeaders.items(): if k not in headers: headers[k] = v return headers - def urlencode(self, params): + def urlEncode(self, params): if not params: return '' - return '?' + urllib.parse.urlencode(params) + return '?' + urllib.parse.urlEncode(params) - def hash_pass(self, password): - auth_str = ('%s:%s' % (self.username, password)).encode('utf-8') - return 'Basic '.encode('utf-8') + base64.b64encode(auth_str).strip() + def hashPassword(self, password): + authStr = ('%s:%s' % (self.username, password)).encode('utf-8') + return 'Basic '.encode('utf-8') + base64.b64encode(authStr).strip() - def get_connection(self): + def getConnection(self): if self.prop.secure_http: conn = http.client.HTTPSConnection(self.prop.api_url) elif self.username is None: @@ -275,70 +283,101 @@ def get_connection(self): return conn -class Content(object): +class Body(object): + '''Superclass for ResponseBody and RequestBody''' + + def parseContentType(self, ctype): + ''' + Parse the Content-Type header, returning the media-type and any + parameters + ''' + + if ctype is None: + self.mediatype = 'application/octet-stream' + self.ctypeParameters = { 'charset': 'ISO-8859-1' } + return + + params = ctype.split(';') + self.mediatype = params.pop(0).strip() + + # Parse parameters + if len(params) > 0: + params = map( lambda s : s.strip().split('=') + , params + ) + + paramDict = {} + for attribute, value in params: + # TODO: Find out if specifying an attribute multiple + # times is even okay, and how it should be handled + attribute = attribute.lower() + if attribute in paramDict: + if type(paramDict[attribute]) is not list: + # Convert singleton value to value-list + paramDict[attribute] = [paramDict[attribute]] + # Insert new value along with pre-existing ones + paramDict[attribute] += value + else: + # Insert singleton attribute value + paramDict[attribute] = value + self.ctypeParameters = paramDict + + else: + self.ctypeParameters = {} + + if 'charset' not in self.ctypeParameters: + self.ctypeParameters['charset'] = 'ISO-8859-1' + # NB: ISO-8859-1 is specified (RFC 2068) as the default + # charset in case none is provided + + def funMangledMediaType(self): + ''' + Mangle the media type into a suitable function name + ''' + return self.mediatype.replace('-','_').replace('/','_') + + +class ResponseBody(Body): ''' Decode a response from the server, respecting the Content-Type field ''' def __init__(self, response): self.response = response self.body = response.read() - (self.mediatype, self.encoding) = self.get_ctype() - - def get_ctype(self): - '''Split the content-type field into mediatype and charset''' - ctype = self.response.getheader('Content-Type') - - start = 0 - end = 0 - try: - end = ctype.index(';') - mediatype = ctype[:end] - except: - mediatype = 'x-application/unknown' - - try: - start = 8 + ctype.index('charset=', end) - end = ctype.index(';', start) - charset = ctype[start:end].rstrip() - except: - charset = 'ISO-8859-1' #TODO + self.parseContentType(self.response.getheader('Content-Type')) + self.encoding = self.ctypeParameters['charset'] - return (mediatype, charset) - - def decode_body(self): + def decodeBody(self): ''' Decode (and replace) self.body via the charset encoding specified in the content-type header ''' self.body = self.body.decode(self.encoding) - - def processBody(self): + def process(self): ''' - Retrieve the body of the response, encoding it into a usuable + Retrieve the body of the response, encoding it into a usable form based on the media-type (mime-type) ''' - handlerName = self.mangled_mtype() - handler = getattr(self, handlerName, self.x_application_unknown) + handlerName = self.funMangledMediaType() + handler = getattr( self, handlerName, + self.application_octet_stream + ) return handler() - def mangled_mtype(self): - ''' - Mangle the media type into a suitable function name - ''' - return self.mediatype.replace('-','_').replace('/','_') - - ## media-type handlers - def x_application_unknown(self): - '''Handler for unknown media-types''' + def application_octet_stream(self): + '''Handler for binary data and unknown media-types. Importantly, + it does absolutely no pre-processing of the body, which means it + will not mess it up. + ''' return self.body def application_json(self): '''Handler for application/json media-type''' - self.decode_body() + self.decodeBody() try: pybody = json.loads(self.body) @@ -351,7 +390,76 @@ def application_json(self): # XXX: This isn't technically correct, but we'll hope for the best. # Patches welcome! - # Insert new media-type handlers here + def application_xml(self): + self.decodeBody() + + try: + pybody = xml.dom.minidom.parseString(self.body) + except Exception: #TODO: What kind of exceptions? + pybody = self.body + + return pybody + + + text_xml = application_xml + # The difference between text/xml and application/xml is whether it + # is human-readable or not. For our purposes, there is no + # difference. RFC 3023, L270. + + # Insert new Response media-type handlers here + +class RequestBody(Body): + ''' + Encode a request body from the client, respecting the Content-Type + field + ''' + def __init__(self, body, headers): + self.body = body + self.headers = headers + self.parseContentType( + getattr(self.headers, 'content-type', None) + ) + self.encoding = self.ctypeParameters['charset'] + + def encodeBody(self): + ''' + Encode (and overwrite) self.body via the charset encoding + specified in the request headers + ''' + self.body = self.body.encode(self.encoding) + + def process(self): + ''' + Process the request body by applying a media-type specific + handler to it. Some media-types need charset encoding as well, + and it is up to the handler to do this by calling + self.encodeBody() + ''' + if self.body is None: + return None + + handlerName = self.funMangledMediaType() + handler = getattr( self, handlerName, + self.application_octet_stream + ) + return handler() + + ## media-type handlers + + def application_octet_stream(self): + '''Handler for binary data and unknown media-types. Importantly, + it does absolutely no pre-processing of the body, which means it + will not mess it up. + ''' + return self.body + + def application_json(self): + self.body = json.dumps(self.body) + self.encodeBody() + return self.body + + # Insert new Request media-type handlers here + class ConnectionProperties(object): __slots__ = ['api_url', 'secure_http', 'extra_headers'] diff --git a/test.py b/test.py old mode 100644 new mode 100755 index 8ef14b6..e8e796e --- a/test.py +++ b/test.py @@ -1,5 +1,6 @@ +#!/usr/bin/env python3 from __future__ import print_function -from agithub import Github +from agithub import GitHub ## # Test harness @@ -136,7 +137,7 @@ def yesno(ans): ### if __name__ == '__main__': - anonSession = initAnonymousSession(Github) + anonSession = initAnonymousSession(GitHub) authSession = None ans = input( @@ -148,7 +149,7 @@ def yesno(ans): username = input('Username: ') password = input ('Password (plain text): ') authSession = initAuthenticatedSession( - Github + GitHub , username=username, password=password )