Merge pull request #16 from soda480/0.1.2

soda480 · web-flow · commit b1dad31e16c0 · 2021-09-15T06:49:24.000-07:00
Add graphql api
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -8,7 +8,7 @@ on:
       - master
 jobs:
   build:
-    runs-on: ubuntu-16.04
+    runs-on: ubuntu-20.04
     container: python:3.6-alpine
 
     steps:
diff --git a/Dockerfile b/Dockerfile
@@ -15,24 +15,16 @@
 #
 
 FROM python:3.6-alpine AS build-image
-
 ENV PYTHONDONTWRITEBYTECODE 1
-
-WORKDIR /github3api
-
-COPY . /github3api/
-
+WORKDIR /code
+COPY . /code/
 RUN pip install pybuilder==0.11.17
 RUN pyb install_dependencies
 RUN pyb install
 
 
 FROM python:3.6-alpine
-
 ENV PYTHONDONTWRITEBYTECODE 1
-
 WORKDIR /opt/github3api
-
-COPY --from=build-image /github3api/target/dist/github3api-*/dist/github3api-*.tar.gz /opt/github3api
-
+COPY --from=build-image /code/target/dist/github3api-*/dist/github3api-*.tar.gz /opt/github3api
 RUN pip install github3api-*.tar.gz
diff --git a/README.md b/README.md
@@ -1,10 +1,12 @@
+# github3api #
 [![GitHub Workflow Status](https://github.com/soda480/github3api/workflows/build/badge.svg)](https://github.com/soda480/github3api/actions)
 [![Code Coverage](https://codecov.io/gh/soda480/github3api/branch/master/graph/badge.svg)](https://codecov.io/gh/soda480/github3api)
 [![Code Grade](https://www.code-inspector.com/project/13337/status/svg)](https://frontend.code-inspector.com/project/13337/dashboard)
 [![PyPI version](https://badge.fury.io/py/github3api.svg)](https://badge.fury.io/py/github3api)
 
-# github3api #
-An advanced REST client for the GitHub API. It is a subclass of [rest3client](https://pypi.org/project/rest3client/) tailored for the GitHub API with special optional directives for GET requests that can return all pages from an endpoint or return a generator that can be iterated over. By default all requests will be retried if ratelimit request limit is reached.
+An advanced REST client for the GitHub API. It is a subclass of [rest3client](https://pypi.org/project/rest3client/) tailored for the GitHub API with special optional directives for GET requests that can return all pages from an endpoint or return a generator that can be iterated over (for paged requests). By default all requests will be retried if ratelimit request limit is reached.
+
+Support for executing Graphql queries including paging; Graphql queries are also retried if Graphql rate limiting occurs.
 
 
 ### Installation ###
@@ -77,6 +79,58 @@ print(client.total('/user/repos'))
 6218
 ```
 
+`graphql` - execute graphql query
+```python
+query = """
+  query($query:String!, $page_size:Int!) {
+    search(query: $query, type: REPOSITORY, first: $page_size) {
+      repositoryCount
+      edges {
+        node {
+          ... on Repository {
+            nameWithOwner
+          }
+        }
+      }
+    }
+  }
+"""
+variables = {"query": "org:edgexfoundry", "page_size":100}
+client.graphql(query, variables)
+```
+
+`graphql paging` - execute paged graphql query
+```python
+query = """
+  query ($query: String!, $page_size: Int!, $cursor: String!) {
+    search(query: $query, type: REPOSITORY, first: $page_size, after: $cursor) {
+      repositoryCount
+      pageInfo {
+        endCursor
+        hasNextPage
+      }
+      edges {
+        cursor
+        node {
+          ... on Repository {
+            nameWithOwner
+          }
+        }
+      }
+    }
+  }
+"""
+variables = {"query": "org:edgexfoundry", "page_size":100}
+for page in client.graphql(query, variables, page=True, keys='data.search'):
+    for repo in page:
+        print(repo['node']['nameWithOwner'])
+```
+
+For Graphql paged queries:
+- the query should include the necessary pageInfo and cursor attributes
+- the keys method argument is a dot annotated string that is used to access the resulting dictionary response object
+- the query is retried every 60 seconds (for up to an hour) if a ratelimit occur
+
 ### Projects using `github3api` ###
 
 * [edgexfoundry/sync-github-labels](https://github.com/edgexfoundry/cd-management/tree/git-label-sync) A script that synchronizes GitHub labels and milestones
@@ -111,7 +165,7 @@ docker container run \
 -it \
 -e http_proxy \
 -e https_proxy \
--v $PWD:/github3api \
+-v $PWD:/code \
 github3api:latest \
 /bin/sh
 ```
diff --git a/build.py b/build.py
@@ -30,7 +30,7 @@
 authors = [Author('Emilio Reyes', 'emilio.reyes@intel.com')]
 summary = 'An advanced REST client for the GitHub API'
 url = 'https://github.com/soda480/github3api'
-version = '0.1.1'
+version = '0.1.2'
 default_task = [
     'clean',
     'analyze',
diff --git a/src/main/python/github3api/githubapi.py b/src/main/python/github3api/githubapi.py
@@ -18,6 +18,7 @@
 from os import getenv
 from datetime import datetime
 
+from retrying import retry
 from rest3client import RESTclient
 from requests.exceptions import HTTPError
 from requests.exceptions import ChunkedEncodingError
@@ -29,6 +30,19 @@
 HOSTNAME = 'api.github.com'
 VERSION = 'v3'
 DEFAULT_PAGE_SIZE = 30
+DEFAULT_GRAPHQL_PAGE_SIZE = 100
+
+
+class GraphqlRateLimitError(Exception):
+    """ GraphQL Rate Limit Error
+    """
+    pass
+
+
+class GraphqlError(Exception):
+    """ GraphQL Error
+    """
+    pass
 
 
 class GitHubAPI(RESTclient):
@@ -221,15 +235,82 @@ def retry_ratelimit_error(exception):
         return False
 
     @staticmethod
-    def _retry_chunkedencodingerror_error(exception):
-        """ return True if exception is ChunkedEncodingError, False otherwise
-            retry:
-                wait_fixed:10000
-                stop_max_attempt_number:120
+    def clear_cursor(query, cursor):
+        """ return query with all cursor references removed if no cursor
+        """
+        if not cursor:
+            query = query.replace('after: $cursor', '')
+            query = query.replace('$cursor: String!', '')
+        return query
+
+    @staticmethod
+    def sanitize_query(query):
+        """ sanitize query
+        """
+        return query.replace('\n', ' ').replace('  ', '').strip()
+
+    @staticmethod
+    def raise_if_error(response):
+        """ raise GraphqlRateLimitError if error exists in errors
+        """
+        if 'errors' in response:
+            logger.debug(f'errors detected in graphql response: {response}')
+            for error in response['errors']:
+                if error.get('type', '') == 'RATE_LIMITED':
+                    raise GraphqlRateLimitError(error.get('message', ''))
+            raise GraphqlError(response['errors'][0]['message'])
+
+    @staticmethod
+    def get_value(data, keys):
+        """ return value represented by keys dot notated string from data dictionary
+        """
+        if '.' in keys:
+            key, rest = keys.split('.', 1)
+            if key in data:
+                return GitHubAPI.get_value(data[key], rest)
+            raise KeyError(f'dictionary does not have key {key}')
+        else:
+            return data[keys]
+
+    def _get_graphql_page(self, query, variables, keys):
+        """ return generator that yields page from graphql response
         """
-        logger.debug(f"checking if '{type(exception).__name__}' exception is a ChunkedEncodingError error")
-        if isinstance(exception, ChunkedEncodingError):
-            logger.info('ratelimit error encountered - retrying request in 10 seconds')
+        variables['page_size'] = DEFAULT_GRAPHQL_PAGE_SIZE
+        variables['cursor'] = ''
+        while True:
+            updated_query = GitHubAPI.clear_cursor(query, variables['cursor'])
+            response = self.post('/graphql', json={'query': updated_query, 'variables': variables})
+            GitHubAPI.raise_if_error(response)
+            yield GitHubAPI.get_value(response, f'{keys}.edges')
+
+            page_info = GitHubAPI.get_value(response, f'{keys}.pageInfo')
+            has_next_page = page_info['hasNextPage']
+            if not has_next_page:
+                logger.debug('no more pages')
+                break
+            variables['cursor'] = page_info['endCursor']
+
+    def check_graphqlratelimiterror(exception):
+        """ return True if exception is GraphQL Rate Limit Error, False otherwise
+        """
+        logger.debug(f"checking if '{type(exception).__name__}' exception is a GraphqlRateLimitError")
+        if isinstance(exception, (GraphqlRateLimitError, TypeError)):
+            logger.debug('exception is a GraphqlRateLimitError - retrying request in 60 seconds')
             return True
-        logger.debug(f'exception is not a ratelimit error: {exception}')
+        logger.debug(f'exception is not a GraphqlRateLimitError: {exception}')
         return False
+
+    @retry(retry_on_exception=check_graphqlratelimiterror, wait_fixed=60000, stop_max_attempt_number=60)
+    def graphql(self, query, variables, page=False, keys=None):
+        """ execute graphql query and return response or paged response if page is True
+        """
+        query = GitHubAPI.sanitize_query(query)
+        if page:
+            response = self._get_graphql_page(query, variables, keys)
+        else:
+            updated_query = GitHubAPI.clear_cursor(query, variables.get('cursor'))
+            response = self.post('/graphql', json={'query': updated_query, 'variables': variables})
+            GitHubAPI.raise_if_error(response)
+        return response
+
+    check_graphqlratelimiterror = staticmethod(check_graphqlratelimiterror)
diff --git a/src/unittest/python/test_githubapi.py b/src/unittest/python/test_githubapi.py