From 4d6dd41cc8cb323aee6af3c69e7da45eed1b0c55 Mon Sep 17 00:00:00 2001 From: Ntwali B Date: Thu, 28 Nov 2024 03:45:01 +0100 Subject: [PATCH 01/15] Refactored DB utils to keep exactly what we are using for our purposes at FjellTopp. --- ckanext/googleanalytics/utils/db.py | 183 ++++------------------ ckanext/googleanalytics/utils/ga.py | 66 +++++++- ckanext/googleanalytics/utils/numerize.py | 2 +- 3 files changed, 90 insertions(+), 161 deletions(-) diff --git a/ckanext/googleanalytics/utils/db.py b/ckanext/googleanalytics/utils/db.py index 2d0b9ad..6058545 100644 --- a/ckanext/googleanalytics/utils/db.py +++ b/ckanext/googleanalytics/utils/db.py @@ -7,12 +7,6 @@ import ckan.model as model from ckan.lib.base import * -from . import ( - RESOURCE_URL_REGEX, PACKAGE_URL, - _resource_url_tag, - _recent_view_days -) - log = logging.getLogger(__name__) cached_tables = {} @@ -34,6 +28,13 @@ def init_tables(): Column("visits_recently", Integer), Column("visits_ever", Integer), ) + url_stats = Table( + "url_stats", + metadata, + Column("url_id", String(512), primary_key=True), + Column("visits_recently", Integer), + Column("visits_ever", Integer), + ) metadata.create_all(model.meta.engine) @@ -68,167 +69,37 @@ def _update_visits(table_name, item_id, recently, ever): connection.execute(stats.insert().values(**values)) -def update_resource_visits(resource_id, recently, ever): - return _update_visits("resource_stats", resource_id, recently, ever) - - def update_package_visits(package_id, recently, ever): return _update_visits("package_stats", package_id, recently, ever) -def get_resource_visits_for_url(url): - connection = model.Session.connection() - count = connection.execute( - text( - """SELECT visits_ever FROM resource_stats, resource - WHERE resource_id = resource.id - AND resource.url = :url""" - ), - url=url, - ).fetchone() - return count and count[0] or "" - - -def get_top_packages(limit=20): - """ get_top_packages is broken, and needs to be rewritten to work with - CKAN 2.*. This is because ckan.authz has been removed in CKAN 2.* - - See commit ffa86c010d5d25fa1881c6b915e48f3b44657612 - """ - items = [] - # caveat emptor: the query below will not filter out private - # or deleted datasets (TODO) - q = model.Session.query(model.Package) - connection = model.Session.connection() - package_stats = get_table("package_stats") - s = select( - [ - package_stats.c.package_id, - package_stats.c.visits_recently, - package_stats.c.visits_ever, - ] - ).order_by(package_stats.c.visits_ever.desc()) - res = connection.execute(s).fetchmany(limit) - for package_id, recent, ever in res: - item = q.filter(text("package.id = '%s'" % package_id)) - if not item.count(): - continue - items.append((item.first(), recent, ever)) - return items - - -def get_top_resources(limit=20): - items = [] - connection = model.Session.connection() - resource_stats = get_table("resource_stats") - s = select( - [ - resource_stats.c.resource_id, - resource_stats.c.visits_recently, - resource_stats.c.visits_ever, - ] - ).order_by(resource_stats.c.visits_recently.desc()) - res = connection.execute(s).fetchmany(limit) - for resource_id, recent, ever in res: - item = model.Session.query(model.Resource).filter( - "resource.id = '%s'" % resource_id - ) - if not item.count(): - continue - items.append((item.first(), recent, ever)) - return items +def update_resource_visits(resource_id, recently, ever): + return _update_visits("resource_stats", resource_id, recently, ever) -def get_resource_stat(resource_id): +def update_url_visits(url_id, recently, ever): + return _update_visits("url_stats", url_id, recently, ever) + + +def _get_stats(table_name, item_id): connection = model.Session.connection() - resource_stats = get_table("resource_stats") + stats = get_table(table_name) + id_col_name = "%s_id" % table_name[: -len("_stats")] + id_col = getattr(stats.c, id_col_name) s = select( - [resource_stats.c.visits_ever] - ).where(resource_stats.c.resource_id == resource_id) + [stats.c.visits_ever] + ).where(id_col == item_id) res = connection.execute(s).fetchone() return res and res or [0] + def get_package_stat(package_id): - connection = model.Session.connection() - package_stats = get_table("package_stats") - s = select( - [package_stats.c.visits_ever] - ).where(package_stats.c.package_id == package_id) - res = connection.execute(s).fetchone() - return res and res or [0] + return _get_stats("package_stats", package_id) -def save_packages(packages_data, summary_date): - engine = model.meta.engine - # clear out existing data before adding new - sql = ( - """DELETE FROM tracking_summary - WHERE tracking_date='%s'; """ - % summary_date - ) - engine.execute(sql) - - for url, count in list(packages_data.items()): - # If it matches the resource then we should mark it as a resource. - # For resources we don't currently find the package ID. - if RESOURCE_URL_REGEX.match(url): - tracking_type = "resource" - else: - tracking_type = "page" - - sql = """INSERT INTO tracking_summary - (url, count, tracking_date, tracking_type) - VALUES (%s, %s, %s, %s);""" - engine.execute(sql, url, count, summary_date, tracking_type) - - # get ids for dataset urls - sql = """UPDATE tracking_summary t - SET package_id = COALESCE( - (SELECT id FROM package p WHERE t.url = %s || p.name) - ,'~~not~found~~') - WHERE t.package_id IS NULL AND tracking_type = 'page';""" - engine.execute(sql, PACKAGE_URL) - - # get ids for dataset edit urls which aren't captured otherwise - sql = """UPDATE tracking_summary t - SET package_id = COALESCE( - (SELECT id FROM package p WHERE t.url = %s || p.name) - ,'~~not~found~~') - WHERE t.package_id = '~~not~found~~' AND tracking_type = 'page';""" - engine.execute(sql, "%sedit/" % PACKAGE_URL) - - # update summary totals for resources - sql = """UPDATE tracking_summary t1 - SET running_total = ( - SELECT sum(count) - FROM tracking_summary t2 - WHERE t1.url = t2.url - AND t2.tracking_date <= t1.tracking_date - ) + t1.count - ,recent_views = ( - SELECT sum(count) - FROM tracking_summary t2 - WHERE t1.url = t2.url - AND t2.tracking_date <= t1.tracking_date AND t2.tracking_date >= t1.tracking_date - %s - ) + t1.count - WHERE t1.running_total = 0 AND tracking_type = 'resource';""" - engine.execute(sql, _recent_view_days()) - - # update summary totals for pages - sql = """UPDATE tracking_summary t1 - SET running_total = ( - SELECT sum(count) - FROM tracking_summary t2 - WHERE t1.package_id = t2.package_id - AND t2.tracking_date <= t1.tracking_date - ) + t1.count - ,recent_views = ( - SELECT sum(count) - FROM tracking_summary t2 - WHERE t1.package_id = t2.package_id - AND t2.tracking_date <= t1.tracking_date AND t2.tracking_date >= t1.tracking_date - %s - ) + t1.count - WHERE t1.running_total = 0 AND tracking_type = 'page' - AND t1.package_id IS NOT NULL - AND t1.package_id != '~~not~found~~';""" - engine.execute(sql, _recent_view_days()) +def get_resource_stat(resource_id): + return _get_stats("resource_stats", resource_id) + + +def get_url_stat(url_id): + return _get_stats("url_stats", url_id) diff --git a/ckanext/googleanalytics/utils/ga.py b/ckanext/googleanalytics/utils/ga.py index 2662784..68d0506 100644 --- a/ckanext/googleanalytics/utils/ga.py +++ b/ckanext/googleanalytics/utils/ga.py @@ -1,15 +1,15 @@ import httplib2 import logging -from oauth2client.service_account import ServiceAccountCredentials from ckan.exceptions import CkanVersionException from googleapiclient.discovery import build -import ckan.plugins.toolkit as tk +from oauth2client.service_account import ServiceAccountCredentials import ckan.model as model +import ckan.plugins.toolkit as tk from . import ( - RESOURCE_URL_REGEX, PACKAGE_URL, - _resource_url_tag, + RESOURCE_URL_REGEX, + PACKAGE_URL, _recent_view_days, db as db_utils ) @@ -29,6 +29,64 @@ def init_service(credentials_path): return service +def get_urls_data(service): + urls = {} + property_id = tk.config.get("googleanalytics.property_id") + dates = { + "recent": {"startDate": "{}daysAgo".format(_recent_view_days()), "endDate": "today"}, + "ever": {"startDate": "2024-01-01", "endDate": "today"} + } + + for date_name, date in dates.items(): + request_body = { + "requests": [{ + "dateRanges": [date], + "metrics": [{"name": "eventCount"}], + "dimensions": [{"name": "eventName"}, {"name": "pagePath"}] + }] + } + + response = service.properties().batchRunReports( + body=request_body, property='properties/{}'.format(property_id) + ).execute() + + for report in response.get('reports', []): + for row in report.get('rows', []): + event_category = row['dimensionValues'][0].get('value', '') + event_label = row['dimensionValues'][1].get('value', '') + event_count = row['metricValues'][0].get('value', 0) + + if event_category == "page_view": + url = event_label + views = event_count + count = 0 + if url in urls and date_name in urls[url]: + count += urls[url][date_name] + urls.setdefault(url, {})[date_name] = int(views) + count + + return urls + + +def save_urls_data(urls_data): + """Save tuples of urls_data to the database""" + urls = {} + for url_id, visits in urls_data.items(): + if url_id in urls: + urls[url_id]["recent"] += visits.get("recent", 0) + urls[url_id]["ever"] += visits.get("ever", 0) + else: + urls[url_id] = { + "recent": visits.get("recent", 0), + "ever": visits.get("ever", 0) + } + + for url_id, visits in urls.items(): + db_utils.update_url_visits(url_id, visits["recent"], visits["ever"]) + log.info("Updated URL path %s with %s visits" % (url_id, visits)) + + model.Session.commit() + + def get_packages_data(service): packages = {} property_id = tk.config.get("googleanalytics.property_id") diff --git a/ckanext/googleanalytics/utils/numerize.py b/ckanext/googleanalytics/utils/numerize.py index 5b003c7..beaf9ba 100644 --- a/ckanext/googleanalytics/utils/numerize.py +++ b/ckanext/googleanalytics/utils/numerize.py @@ -1,6 +1,6 @@ def numerize(num): ''' - Shows a number is a human readable format. + Shows a number in a human readable format. Source: https://stackoverflow.com/a/45846841 ''' num = float('{:.3g}'.format(num)) From 861350b6f501fd9cc23083438b2536e96fe6023e Mon Sep 17 00:00:00 2001 From: Ntwali B Date: Thu, 28 Nov 2024 03:45:51 +0100 Subject: [PATCH 02/15] Added instructions to fetch page views counts. --- ckanext/googleanalytics/cli.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/ckanext/googleanalytics/cli.py b/ckanext/googleanalytics/cli.py index c3f3c96..c227c1e 100644 --- a/ckanext/googleanalytics/cli.py +++ b/ckanext/googleanalytics/cli.py @@ -39,6 +39,14 @@ def load(credentials, start_date): in a local database """ service = ga_utils.init_service(credentials) + + # Get and save resource and dataset download information packages_data = ga_utils.get_packages_data(service) ga_utils.save_packages_data(packages_data) - log.info("Saved %s records from google" % len(packages_data)) + + # Get and save url views information + urls_data = ga_utils.get_urls_data(service) + ga_utils.save_urls_data(urls_data) + + log.info("Saved %s packages visits from google" % len(packages_data)) + log.info("Saved %s urls visits from google" % len(urls_data)) From 8aa1a206d6d7e65934adbd09c2f32a8bf04ce685 Mon Sep 17 00:00:00 2001 From: Ntwali B Date: Thu, 28 Nov 2024 03:46:36 +0100 Subject: [PATCH 03/15] Removed unused controller. --- ckanext/googleanalytics/controller.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/ckanext/googleanalytics/controller.py b/ckanext/googleanalytics/controller.py index a01a5d5..0121e4d 100644 --- a/ckanext/googleanalytics/controller.py +++ b/ckanext/googleanalytics/controller.py @@ -22,13 +22,6 @@ log = logging.getLogger("ckanext.googleanalytics") -class GAController(BaseController): - def view(self): - # get package objects corresponding to popular GA content - c.top_resources = db_utils.get_top_resources(limit=10) - return render("summary.html") - - class GAApiController(ApiController): # intercept API calls to record via google analytics def _post_analytics( From a6be2801257f24fd4359d5d05705f32be9c9fbaf Mon Sep 17 00:00:00 2001 From: Ntwali B Date: Thu, 28 Nov 2024 04:04:47 +0100 Subject: [PATCH 04/15] Added an action and a helper to fetch URL statistics. --- ckanext/googleanalytics/actions.py | 14 ++++++++++++++ ckanext/googleanalytics/helpers.py | 5 +++++ 2 files changed, 19 insertions(+) diff --git a/ckanext/googleanalytics/actions.py b/ckanext/googleanalytics/actions.py index d25e1c3..6450094 100644 --- a/ckanext/googleanalytics/actions.py +++ b/ckanext/googleanalytics/actions.py @@ -39,6 +39,20 @@ def package_stat(context, data_dict): return json.dumps(result) +@toolkit.side_effect_free +def url_stat(context, data_dict): + ''' + Fetch url stats + ''' + url_id = data_dict['url_id'] + result = 0 + try: + result = db_utils.get_url_stat(url_id)[0] + except Exception as e: + log.error("URL not found: {}".format(e)) + return json.dumps(result) + + def download_package_stat(context, data_dict): ''' Download package stats from Google analytics into the local database diff --git a/ckanext/googleanalytics/helpers.py b/ckanext/googleanalytics/helpers.py index 53d8fde..5057531 100644 --- a/ckanext/googleanalytics/helpers.py +++ b/ckanext/googleanalytics/helpers.py @@ -10,3 +10,8 @@ def get_package_stats(package_id): def get_resource_stats(resource_id): resource_stat = toolkit.get_action('resource_stats')({}, {'resource_id': resource_id}) return numerize(int(resource_stat)) + + +def get_url_stats(url_id): + url_stat = toolkit.get_action('url_stats')({}, {'url_id': url_id}) + return numerize(int(url_stat)) From c32f48d97f3a4565027b5d40d599b553c914e733 Mon Sep 17 00:00:00 2001 From: Ntwali B Date: Thu, 28 Nov 2024 04:32:00 +0100 Subject: [PATCH 05/15] Format and augment setup.py --- setup.py | 82 +++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 61 insertions(+), 21 deletions(-) diff --git a/setup.py b/setup.py index 6da33ac..f612876 100644 --- a/setup.py +++ b/setup.py @@ -1,47 +1,87 @@ import os from io import open from setuptools import setup, find_packages -HERE = os.path.dirname(__file__) -version = "2.0.7" +HERE = os.path.dirname(__file__) extras_require = {} _extras_groups = [ - ('requirements', 'requirements.txt'), + ("requirements", "requirements.txt"), ] for group, filepath in _extras_groups: with open(os.path.join(HERE, filepath), 'r') as f: extras_require[group] = f.readlines() # Get the long description from the relevant file -with open(os.path.join(HERE, 'README.md'), encoding='utf-8') as f: +with open(os.path.join(HERE, "README.md"), encoding="utf-8") as f: long_description = f.read() setup( name="ckanext-googleanalytics", - version=version, + + # Versions should comply with PEP440. For a discussion on single-sourcing + # the version across setup.py and the project code, see + # http://packaging.python.org/en/latest/tutorial.html#version + version="1.0.0", + description="Add GA tracking and reporting to CKAN instance", long_description=long_description, long_description_content_type="text/markdown", - classifiers=[], # Get strings from http://pypi.python.org/pypi?%3Aaction=list_classifiers - keywords="", - author="Seb Bacon", - author_email="seb.bacon@gmail.com", - url="", - license="", - packages=find_packages(exclude=["ez_setup", "examples", "tests"]), + + # The project's main homepage. + url='https://github.com/fjelltopp/ckanext-googleanalytics4', + + # Author details + author="Seb Bacon, Ntwali Bashige", + author_email="seb.bacon@gmail.com, ntwali.bashige@gmail.com", + + # See http://pypi.python.org/pypi?%3Aaction=list_classifiers + classifiers=[ + # How mature is this project? Common values are + # 3 - Alpha + # 4 - Beta + # 5 - Production/Stable + "Development Status :: 4 - Beta", + + # Pick your license as you wish (should match "license" above) + "License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)", + + # Specify the Python versions you support here. In particular, ensure + # that you indicate whether you support Python 2, Python 3 or both. + "Programming Language :: Python :: 3.7", + ], + + # What does your project relate to? + keywords="CKAN, Google Analytics 4", + + # You can just specify the packages manually here if your project is + # simple. Or you can use find_packages(). + packages=find_packages(exclude=["contrib", "docs", "tests*"]), namespace_packages=["ckanext", "ckanext.googleanalytics"], + + install_requires=[ + # CKAN extensions should not list dependencies here, but in a separate + # ``requirements.txt`` file. + # + # http://docs.ckan.org/en/latest/extensions/best-practices.html + # add-third-party-libraries-to-requirements-txt + ], + extras_require=extras_require, include_package_data=True, zip_safe=False, - install_requires=[], - extras_require=extras_require, + + # Although 'package_data' is the preferred approach, in some case you may + # need to place data files outside of your packages. + # see http://docs.python.org/3.4/distutils/setupscript.html + # installing-additional-files + # In this case, 'data_file' will be installed into '/my_data' + data_files=[], + + # To provide executable scripts, use entry points in preference to the + # "scripts" keyword. Entry points provide cross-platform support and allow + # pip to create the appropriate form of executable for the target platform. entry_points=""" [ckan.plugins] - # Add plugins here, eg - googleanalytics=ckanext.googleanalytics.plugin:GoogleAnalyticsPlugin - - [paste.paster_command] - loadanalytics = ckanext.googleanalytics.commands:LoadAnalytics - initdb = ckanext.googleanalytics.commands:InitDB - """, + googleanalytics=ckanext.googleanalytics.plugin:GoogleAnalyticsPlugin + """, ) From e89829ed5310add9ec635c16b89bbf17d247f48d Mon Sep 17 00:00:00 2001 From: Ntwali B Date: Thu, 28 Nov 2024 05:19:13 +0100 Subject: [PATCH 06/15] Deleted unused files. --- ckanext/googleanalytics/controller.py | 120 -------------------------- ckanext/googleanalytics/gasnippet.py | 25 ------ 2 files changed, 145 deletions(-) delete mode 100644 ckanext/googleanalytics/controller.py delete mode 100644 ckanext/googleanalytics/gasnippet.py diff --git a/ckanext/googleanalytics/controller.py b/ckanext/googleanalytics/controller.py deleted file mode 100644 index 0121e4d..0000000 --- a/ckanext/googleanalytics/controller.py +++ /dev/null @@ -1,120 +0,0 @@ -from __future__ import absolute_import -import hashlib -import logging - -from ckan.lib.base import BaseController, c, render, request -from ckan.exceptions import CkanVersionException -from ckan.controllers.api import ApiController -from paste.util.multidict import MultiDict -import ckan.plugins.toolkit as tk -from utils import db as db_utils -import ckan.logic as logic -from pylons import config -from . import plugin - -try: - tk.requires_ckan_version("2.9") -except CkanVersionException: - pass -else: - from builtins import str - -log = logging.getLogger("ckanext.googleanalytics") - - -class GAApiController(ApiController): - # intercept API calls to record via google analytics - def _post_analytics( - self, user, request_obj_type, request_function, request_id - ): - if config.get("googleanalytics.id"): - data_dict = { - "v": 1, - "tid": config.get("googleanalytics.id"), - "cid": hashlib.md5(user).hexdigest(), - # customer id should be obfuscated - "t": "event", - "dh": c.environ["HTTP_HOST"], - "dp": c.environ["PATH_INFO"], - "dr": c.environ.get("HTTP_REFERER", ""), - "ec": "CKAN API Request", - "ea": request_obj_type + request_function, - "el": request_id, - } - plugin.GoogleAnalyticsPlugin.analytics_queue.put(data_dict) - - def action(self, logic_function, ver=None): - try: - function = logic.get_action(logic_function) - side_effect_free = getattr(function, "side_effect_free", False) - request_data = self._get_request_data( - try_url_params=side_effect_free - ) - if isinstance(request_data, dict): - id = request_data.get("id", "") - if "q" in request_data: - id = request_data["q"] - if "query" in request_data: - id = request_data["query"] - self._post_analytics(c.user, logic_function, "", id) - except Exception as e: - log.debug(e) - pass - return ApiController.action(self, logic_function, ver) - - def list(self, ver=None, register=None, subregister=None, id=None): - self._post_analytics( - c.user, - register + ("_" + str(subregister) if subregister else ""), - "list", - id, - ) - return ApiController.list(self, ver, register, subregister, id) - - def show( - self, ver=None, register=None, subregister=None, id=None, id2=None - ): - self._post_analytics( - c.user, - register + ("_" + str(subregister) if subregister else ""), - "show", - id, - ) - return ApiController.show(self, ver, register, subregister, id, id2) - - def update( - self, ver=None, register=None, subregister=None, id=None, id2=None - ): - self._post_analytics( - c.user, - register + ("_" + str(subregister) if subregister else ""), - "update", - id, - ) - return ApiController.update(self, ver, register, subregister, id, id2) - - def delete( - self, ver=None, register=None, subregister=None, id=None, id2=None - ): - self._post_analytics( - c.user, - register + ("_" + str(subregister) if subregister else ""), - "delete", - id, - ) - return ApiController.delete(self, ver, register, subregister, id, id2) - - def search(self, ver=None, register=None): - id = None - try: - params = MultiDict(self._get_search_params(request.params)) - if "q" in list(params.keys()): - id = params["q"] - if "query" in list(params.keys()): - id = params["query"] - except ValueError as e: - log.debug(str(e)) - pass - self._post_analytics(c.user, register, "search", id) - - return ApiController.search(self, ver, register) diff --git a/ckanext/googleanalytics/gasnippet.py b/ckanext/googleanalytics/gasnippet.py deleted file mode 100644 index 8a12f38..0000000 --- a/ckanext/googleanalytics/gasnippet.py +++ /dev/null @@ -1,25 +0,0 @@ -header_code = """ - -""" - -footer_code = """ - -""" - -download_style = """ - -""" From bbc04571d06268b051cf13b6e55699bd12980f41 Mon Sep 17 00:00:00 2001 From: Ntwali B Date: Thu, 28 Nov 2024 05:19:52 +0100 Subject: [PATCH 07/15] Small formatting. --- ckanext/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckanext/__init__.py b/ckanext/__init__.py index ff53016..4df9b7f 100644 --- a/ckanext/__init__.py +++ b/ckanext/__init__.py @@ -1,4 +1,4 @@ -# this is a namespace package +# This is a namespace package try: import pkg_resources From 821122effc155595e4b4a765c5c04105e6552a79 Mon Sep 17 00:00:00 2001 From: Ntwali B Date: Thu, 28 Nov 2024 05:20:44 +0100 Subject: [PATCH 08/15] Removed useless test. --- tests/test_general.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tests/test_general.py b/tests/test_general.py index 3c08fe6..c958376 100644 --- a/tests/test_general.py +++ b/tests/test_general.py @@ -74,16 +74,6 @@ def test_analytics_snippet(self): ) assert code in response.body - def test_top_packages(self): - command = LoadAnalytics("loadanalytics") - command.TEST_HOST = MockClient("localhost", 6969) - command.CONFIG = self.config - command.run([]) - packages = db_utils.get_top_packages() - resources = db_utils.get_top_resources() - self.assertEquals(packages[0][1], 2) - self.assertEquals(resources[0][1], 4) - def test_download_count_inserted(self): command = LoadAnalytics("loadanalytics") command.TEST_HOST = MockClient("localhost", 6969) From cab1fe57722ed5b978fa6ceeb48a728020b958d0 Mon Sep 17 00:00:00 2001 From: Ntwali B Date: Thu, 28 Nov 2024 05:26:16 +0100 Subject: [PATCH 09/15] Refactoring... --- ckanext/googleanalytics/plugin/__init__.py | 54 +++++++++++++--------- 1 file changed, 31 insertions(+), 23 deletions(-) diff --git a/ckanext/googleanalytics/plugin/__init__.py b/ckanext/googleanalytics/plugin/__init__.py index 984c503..410b50b 100644 --- a/ckanext/googleanalytics/plugin/__init__.py +++ b/ckanext/googleanalytics/plugin/__init__.py @@ -6,7 +6,7 @@ import json import ast -from ckanext.googleanalytics.actions import resource_stat , package_stat, download_package_stat +from ckanext.googleanalytics.actions import resource_stat , package_stat, url_stat, download_package_stat from ckanext.googleanalytics.plugin.flask_plugin import GAMixinPlugin import ckanext.googleanalytics.helpers as googleanalytics_helpers from six.moves.urllib.parse import urlencode @@ -46,24 +46,24 @@ def run(self): class GoogleAnalyticsPlugin(GAMixinPlugin, p.SingletonPlugin): + p.implements(p.IActions) p.implements(p.IConfigurable) p.implements(p.IConfigurer, inherit=True) + p.implements(p.IPackageController, inherit=True) + p.implements(p.IResourceController, inherit=True) p.implements(p.ITemplateHelpers) - p.implements(p.IActions) + # IActions def get_actions(self): return { - 'resource_stats': resource_stat, - 'package_stats': package_stat, - 'download_package_stats': download_package_stat + "resource_stats": resource_stat, + "package_stats": package_stat, + "url_stats": url_stat, + "download_package_stats": download_package_stat } + # IConfigurable def configure(self, config): - """Load config settings for this extension from config file. - - See IConfigurable. - - """ if "googleanalytics.measurement_id" not in config: msg = "Missing googleanalytics.measurement_id in config. One must be set." raise GoogleAnalyticsException(msg) @@ -119,25 +119,34 @@ def configure(self, config): t.setDaemon(True) t.start() + # IConfigurer def update_config(self, config): - """Change the CKAN (Pylons) environment configuration. - - See IConfigurer. - - """ p.toolkit.add_template_directory(config, "../templates") - p.toolkit.add_resource('../assets', 'ckanext-googleanalytics') + p.toolkit.add_resource("../assets", "ckanext-googleanalytics") - def get_helpers(self): - """Return the CKAN 2.0 template helper functions this plugin provides. + # IPackageController (CKAN <= 2.9) + # IResourceController (CKAN <= 2.9) + def after_delete(self, context, data_dict): + # Make sure to delete package/resource visists when the corresponding package/resource is deleted + pass - See ITemplateHelpers. + # IPackageController (CKAN > 2.9) + def after_dataset_delete(self, context, data_dict): + # Make sure to delete package visists when the corresponding package is deleted + pass - """ + # IResourceController (CKAN > 2.9) + def after_resource_delete(self, context, data_dict): + # Make sure to delete resource visists when the corresponding resource is deleted + pass + + # ITemplateHelpers + def get_helpers(self): return { "googleanalytics_header": self.googleanalytics_header, - 'get_package_stats': googleanalytics_helpers.get_package_stats, - 'get_resource_stats': googleanalytics_helpers.get_resource_stats + "get_package_stats": googleanalytics_helpers.get_package_stats, + "get_resource_stats": googleanalytics_helpers.get_resource_stats, + "get_url_stats": googleanalytics_helpers.get_url_stats } def googleanalytics_header(self): @@ -156,7 +165,6 @@ def googleanalytics_header(self): if self.enable_user_id and current_user: self.googleanalytics_fields["userId"] = str(tk.c.userobj.id) - ## annonymize IP self.googleanalytics_fields["anonymize_ip"] = "true" data = { "googleanalytics_id": self.googleanalytics_id, From 575799a4e86aa9410e11d2162a59a2bf61487fa1 Mon Sep 17 00:00:00 2001 From: Ntwali B Date: Thu, 28 Nov 2024 06:51:28 +0100 Subject: [PATCH 10/15] Deleted old plugin, view and cli. --- ckanext/googleanalytics/cli.py | 52 ----- ckanext/googleanalytics/plugin/__init__.py | 178 ------------------ .../googleanalytics/plugin/flask_plugin.py | 22 --- ckanext/googleanalytics/views.py | 132 ------------- 4 files changed, 384 deletions(-) delete mode 100644 ckanext/googleanalytics/cli.py delete mode 100644 ckanext/googleanalytics/plugin/__init__.py delete mode 100644 ckanext/googleanalytics/plugin/flask_plugin.py delete mode 100644 ckanext/googleanalytics/views.py diff --git a/ckanext/googleanalytics/cli.py b/ckanext/googleanalytics/cli.py deleted file mode 100644 index c227c1e..0000000 --- a/ckanext/googleanalytics/cli.py +++ /dev/null @@ -1,52 +0,0 @@ -# encoding: utf-8 -import logging -import click - -import ckan.plugins.toolkit as tk -import ckan.model as model -from .utils import ( - db as db_utils, - ga as ga_utils -) - - -log = logging.getLogger(__name__) - - -def get_commands(): - return [googleanalytics] - - -@click.group(short_help=u"GoogleAnalytics commands") -def googleanalytics(): - pass - - -@googleanalytics.command() -def init(): - """Initialise the local stats database tables""" - model.Session.remove() - model.Session.configure(bind=model.meta.engine) - db_utils.init_tables() - log.info("Set up statistics tables in main database") - - -@googleanalytics.command(short_help=u"Load data from Google Analytics API") -@click.argument("credentials", type=click.Path(exists=True)) -@click.option("-s", "--start-date", required=False) -def load(credentials, start_date): - """Parse data from Google Analytics API and store it - in a local database - """ - service = ga_utils.init_service(credentials) - - # Get and save resource and dataset download information - packages_data = ga_utils.get_packages_data(service) - ga_utils.save_packages_data(packages_data) - - # Get and save url views information - urls_data = ga_utils.get_urls_data(service) - ga_utils.save_urls_data(urls_data) - - log.info("Saved %s packages visits from google" % len(packages_data)) - log.info("Saved %s urls visits from google" % len(urls_data)) diff --git a/ckanext/googleanalytics/plugin/__init__.py b/ckanext/googleanalytics/plugin/__init__.py deleted file mode 100644 index 410b50b..0000000 --- a/ckanext/googleanalytics/plugin/__init__.py +++ /dev/null @@ -1,178 +0,0 @@ -# -*- coding: utf-8 -*- - -import threading -import requests -import logging -import json -import ast - -from ckanext.googleanalytics.actions import resource_stat , package_stat, url_stat, download_package_stat -from ckanext.googleanalytics.plugin.flask_plugin import GAMixinPlugin -import ckanext.googleanalytics.helpers as googleanalytics_helpers -from six.moves.urllib.parse import urlencode -import ckan.plugins.toolkit as tk -import ckan.lib.helpers as h -import ckan.plugins as p - - -DEFAULT_RESOURCE_URL_TAG = "/downloads/" - -log = logging.getLogger(__name__) - - -class GoogleAnalyticsException(Exception): - pass - - -class AnalyticsPostThread(threading.Thread): - """Threaded Url POST""" - - def __init__(self, queue): - threading.Thread.__init__(self) - self.queue = queue - - def run(self): - while True: - data = self.queue.get() - log.debug("Sending API event to Google Analytics: GA4") - measure_id = tk.config.get("googleanalytics.measurement_id") - api_secret = tk.config.get("googleanalytics.api_secret") - res = requests.post( - "https://www.google-analytics.com/mp/collect?measurement_id={}&api_secret={}".format(measure_id, api_secret), - data=json.dumps(data), - timeout=10, - ) - self.queue.task_done() - - -class GoogleAnalyticsPlugin(GAMixinPlugin, p.SingletonPlugin): - p.implements(p.IActions) - p.implements(p.IConfigurable) - p.implements(p.IConfigurer, inherit=True) - p.implements(p.IPackageController, inherit=True) - p.implements(p.IResourceController, inherit=True) - p.implements(p.ITemplateHelpers) - - # IActions - def get_actions(self): - return { - "resource_stats": resource_stat, - "package_stats": package_stat, - "url_stats": url_stat, - "download_package_stats": download_package_stat - } - - # IConfigurable - def configure(self, config): - if "googleanalytics.measurement_id" not in config: - msg = "Missing googleanalytics.measurement_id in config. One must be set." - raise GoogleAnalyticsException(msg) - # TODO: Do we still need to submit `gogleanalytics_id` separately? - self.googleanalytics_id = config.get('googleanalytics.measurement_id') - self.googleanalytics_domain = config.get( - "googleanalytics.domain", "auto" - ) - self.googleanalytics_fields = ast.literal_eval( - config.get("googleanalytics.fields", "{}") - ) - - googleanalytics_linked_domains = config.get( - "googleanalytics.linked_domains", "" - ) - self.googleanalytics_linked_domains = [ - x.strip() for x in googleanalytics_linked_domains.split(",") if x - ] - - if self.googleanalytics_linked_domains: - self.googleanalytics_fields["allowLinker"] = "true" - - # If resource_prefix is not in config file then write the default value - # to the config dict, otherwise templates seem to get 'true' when they - # try to read resource_prefix from config. - if "googleanalytics_resource_prefix" not in config: - config[ - "googleanalytics_resource_prefix" - ] = DEFAULT_RESOURCE_URL_TAG - self.googleanalytics_resource_prefix = config[ - "googleanalytics_resource_prefix" - ] - - self.show_downloads = tk.asbool( - config.get("googleanalytics.show_downloads", True) - ) - self.track_events = tk.asbool( - config.get("googleanalytics.track_events", False) - ) - self.enable_user_id = tk.asbool( - config.get("googleanalytics.enable_user_id", False) - ) - - self.googleanalytics_measurment_id = config.get( - "googleanalytics.measurement_id", "" - ) - - # p.toolkit.add_resource("../assets", "ckanext-googleanalytics") - - # spawn a pool of 5 threads, and pass them queue instance - for i in range(5): - t = AnalyticsPostThread(self.analytics_queue) - t.setDaemon(True) - t.start() - - # IConfigurer - def update_config(self, config): - p.toolkit.add_template_directory(config, "../templates") - p.toolkit.add_resource("../assets", "ckanext-googleanalytics") - - # IPackageController (CKAN <= 2.9) - # IResourceController (CKAN <= 2.9) - def after_delete(self, context, data_dict): - # Make sure to delete package/resource visists when the corresponding package/resource is deleted - pass - - # IPackageController (CKAN > 2.9) - def after_dataset_delete(self, context, data_dict): - # Make sure to delete package visists when the corresponding package is deleted - pass - - # IResourceController (CKAN > 2.9) - def after_resource_delete(self, context, data_dict): - # Make sure to delete resource visists when the corresponding resource is deleted - pass - - # ITemplateHelpers - def get_helpers(self): - return { - "googleanalytics_header": self.googleanalytics_header, - "get_package_stats": googleanalytics_helpers.get_package_stats, - "get_resource_stats": googleanalytics_helpers.get_resource_stats, - "get_url_stats": googleanalytics_helpers.get_url_stats - } - - def googleanalytics_header(self): - """Render the googleanalytics_header snippet for CKAN 2.0 templates. - - This is a template helper function that renders the - googleanalytics_header jinja snippet. To be called from the jinja - templates in this extension, see ITemplateHelpers. - - """ - try: - current_user = tk.c.user - except AttributeError: - current_user = False - - if self.enable_user_id and current_user: - self.googleanalytics_fields["userId"] = str(tk.c.userobj.id) - - self.googleanalytics_fields["anonymize_ip"] = "true" - data = { - "googleanalytics_id": self.googleanalytics_id, - "googleanalytics_domain": self.googleanalytics_domain, - "googleanalytics_fields": str(self.googleanalytics_fields), - "googleanalytics_linked_domains": self.googleanalytics_linked_domains, - "googleanalytics_measurement_id": self.googleanalytics_measurment_id - } - return p.toolkit.render_snippet( - "googleanalytics/snippets/googleanalytics_header.html", data - ) diff --git a/ckanext/googleanalytics/plugin/flask_plugin.py b/ckanext/googleanalytics/plugin/flask_plugin.py deleted file mode 100644 index 607b178..0000000 --- a/ckanext/googleanalytics/plugin/flask_plugin.py +++ /dev/null @@ -1,22 +0,0 @@ -# -*- coding: utf-8 -*- -import queue - -import ckan.plugins as plugins - -from ckanext.googleanalytics.views import ga -from ckanext.googleanalytics.cli import get_commands - - -class GAMixinPlugin(plugins.SingletonPlugin): - plugins.implements(plugins.IBlueprint) - plugins.implements(plugins.IClick) - - analytics_queue = queue.Queue() - - # IBlueprint - def get_blueprint(self): - return [ga] - - # IClick - def get_commands(self): - return get_commands() diff --git a/ckanext/googleanalytics/views.py b/ckanext/googleanalytics/views.py deleted file mode 100644 index 7abacd6..0000000 --- a/ckanext/googleanalytics/views.py +++ /dev/null @@ -1,132 +0,0 @@ -# -*- coding: utf-8 -*- - -import hashlib -import logging -import six - -from werkzeug.utils import import_string, ImportStringError -import ckan.views.resource as resource -import ckan.plugins.toolkit as tk -import ckan.views.api as api -from flask import Blueprint -import ckan.logic as logic -import ckan.model as model -from ckan.common import g - - -CONFIG_HANDLER_PATH = "googleanalytics.download_handler" - -log = logging.getLogger(__name__) -ga = Blueprint("google_analytics", "google_analytics") - - -def action(logic_function, ver=api.API_MAX_VERSION): - try: - function = logic.get_action(logic_function) - side_effect_free = getattr(function, "side_effect_free", False) - request_data = api._get_request_data(try_url_params=side_effect_free) - if isinstance(request_data, dict): - id = request_data.get("id", "") - if "q" in request_data: - id = request_data["q"] - if "query" in request_data: - id = request_data[u"query"] - _post_analytics(g.user) - except Exception as e: - log.debug(e) - pass - - return api.action(logic_function, ver) - - -ga.add_url_rule( - "/api/action/", - methods=["GET", "POST"], - view_func=action, -) -ga.add_url_rule( - u"/api//action/".format( - api.API_MAX_VERSION - ), - methods=["GET", "POST"], - view_func=action, -) -ga.add_url_rule( - u"//action/".format( - api.API_MAX_VERSION - ), - methods=["GET", "POST"], - view_func=action, -) - - -def download(id, resource_id, filename=None, package_type="dataset"): - handler_path = tk.config.get("googleanalytics.download_handler") - using_default_handler = False - - if handler_path: - try: - download_handler = import_string(handler_path) - except (ImportError, ImportStringError) as e: - log.debug("`download_handler` configured but not found") - raise e - else: - log.debug("`download_handler` not configured, using CKAN's default which is: resource.download") - download_handler = resource.download - using_default_handler = True - - try: - _post_analytics(g.user) - except Exception as e: - log.error(e) - - if using_default_handler: - return download_handler( - package_type= 'dataset', - id=id, - resource_id=resource_id, - filename=filename, - ) - else: - return download_handler( - id=id, - resource_id=resource_id, - filename=filename, - ) - - -ga.add_url_rule( - "/dataset//resource//download", view_func=download -) -ga.add_url_rule( - "/dataset//resource//download/", - view_func=download, -) - - -def _post_analytics(user): - from ckanext.googleanalytics.plugin import GoogleAnalyticsPlugin - - path = tk.request.environ["PATH_INFO"] - path_id = path.split("/dataset/")[1].split("/")[0] - context = { - u'model': model, - u'session': model.Session, - u'user': user - } - package = tk.get_action("package_show")(context, {"id": path_id}) - referer_link = "/dataset/{}".format(package.get("name")) - - resource_data = { - "client_id": hashlib.md5(six.ensure_binary(tk.c.user)).hexdigest(), - "events": [ - { - "name": "file_download", - "params" : { - "link_url": referer_link - } - } - ] - } - - GoogleAnalyticsPlugin.analytics_queue.put(resource_data) From c9f10290f62dc81aa539df052614b761f537a78d Mon Sep 17 00:00:00 2001 From: Ntwali B Date: Thu, 28 Nov 2024 06:52:12 +0100 Subject: [PATCH 11/15] New refactored extension. --- ckanext/googleanalytics/actions.py | 38 ++--- .../googleanalytics/blueprints/__init__.py | 5 + .../blueprints/googleanalytics_blueprint.py | 111 +++++++++++++ ckanext/googleanalytics/command.py | 48 ++++++ ckanext/googleanalytics/logic.py | 80 +++++++++ ckanext/googleanalytics/model.py | 56 +++++++ ckanext/googleanalytics/plugin.py | 156 ++++++++++++++++++ ckanext/googleanalytics/utils/db.py | 65 +------- ckanext/googleanalytics/utils/ga.py | 132 +++++++-------- 9 files changed, 546 insertions(+), 145 deletions(-) create mode 100644 ckanext/googleanalytics/blueprints/__init__.py create mode 100644 ckanext/googleanalytics/blueprints/googleanalytics_blueprint.py create mode 100644 ckanext/googleanalytics/command.py create mode 100644 ckanext/googleanalytics/logic.py create mode 100644 ckanext/googleanalytics/model.py create mode 100644 ckanext/googleanalytics/plugin.py diff --git a/ckanext/googleanalytics/actions.py b/ckanext/googleanalytics/actions.py index 6450094..72c1078 100644 --- a/ckanext/googleanalytics/actions.py +++ b/ckanext/googleanalytics/actions.py @@ -2,40 +2,42 @@ import json from ckan.plugins import toolkit -from .utils import ( - db as db_utils, - ga as ga_utils -) +from ckanext.googleanalytics.model import ( + get_package_stat, + get_resource_stat, + get_url_stat +) +from ckanext.googleanalytics.logic import load_package_stats log = logging.getLogger(__name__) @toolkit.side_effect_free -def resource_stat(context, data_dict): +def package_stat(context, data_dict): ''' - Fetch resource stats + Fetch package stats ''' - resource_id = data_dict['resource_id'] + package_id = data_dict['package_id'] result = 0 try: - result = db_utils.get_resource_stat(resource_id)[0] + result = get_package_stat(package_id)[0] except Exception as e: - log.error("Resource not found: {}".format(e)) + log.error("Package not found: {}".format(e)) return json.dumps(result) @toolkit.side_effect_free -def package_stat(context, data_dict): +def resource_stat(context, data_dict): ''' - Fetch package stats + Fetch resource stats ''' - package_id = data_dict['package_id'] + resource_id = data_dict['resource_id'] result = 0 try: - result = db_utils.get_package_stat(package_id)[0] + result = get_resource_stat(resource_id)[0] except Exception as e: - log.error("Package not found: {}".format(e)) + log.error("Resource not found: {}".format(e)) return json.dumps(result) @@ -47,7 +49,7 @@ def url_stat(context, data_dict): url_id = data_dict['url_id'] result = 0 try: - result = db_utils.get_url_stat(url_id)[0] + result = get_url_stat(url_id)[0] except Exception as e: log.error("URL not found: {}".format(e)) return json.dumps(result) @@ -57,10 +59,8 @@ def download_package_stat(context, data_dict): ''' Download package stats from Google analytics into the local database ''' - credentials_path = data_dict['credentials_path'] - service = ga_utils.init_service(credentials_path) - packages_data = ga_utils.get_packages_data(service) - ga_utils.save_packages_data(packages_data) + credentials = data_dict['credentials_path'] + packages_data = ckanext.googleanalytics(credentials) return json.dumps({ 'package_count': len(packages_data) }) diff --git a/ckanext/googleanalytics/blueprints/__init__.py b/ckanext/googleanalytics/blueprints/__init__.py new file mode 100644 index 0000000..f834f22 --- /dev/null +++ b/ckanext/googleanalytics/blueprints/__init__.py @@ -0,0 +1,5 @@ +from ckanext.googleanalytics.blueprints.googleanalytics_blueprint import googleanalytics_blueprint + +blueprints = [ + googleanalytics_blueprint, +] diff --git a/ckanext/googleanalytics/blueprints/googleanalytics_blueprint.py b/ckanext/googleanalytics/blueprints/googleanalytics_blueprint.py new file mode 100644 index 0000000..856ad78 --- /dev/null +++ b/ckanext/googleanalytics/blueprints/googleanalytics_blueprint.py @@ -0,0 +1,111 @@ +import hashlib +import logging +import six + +from ckan.common import g +from flask import Blueprint +from werkzeug.utils import import_string, ImportStringError +import ckan.logic as logic +import ckan.model as model +import ckan.plugins.toolkit as toolkit +import ckan.views.api as api +import ckan.views.resource as resource + +from ckanext.googleanalytics.logic import post_analytics + + +log = logging.getLogger(__name__) + + +googleanalytics_blueprint = Blueprint( + u'google_analytics', + __name__ +) + + +CONFIG_HANDLER_PATH = "googleanalytics.download_handler" + + +def action(logic_function, ver=api.API_MAX_VERSION): + try: + function = logic.get_action(logic_function) + side_effect_free = getattr(function, "side_effect_free", False) + request_data = api._get_request_data(try_url_params=side_effect_free) + if isinstance(request_data, dict): + id = request_data.get("id", "") + if "q" in request_data: + id = request_data["q"] + if "query" in request_data: + id = request_data[u"query"] + post_analytics(g.user) + except Exception as e: + log.debug(e) + pass + + return api.action(logic_function, ver) + + +googleanalytics_blueprint.add_url_rule( + "/api/action/", + methods=["GET", "POST"], + view_func=action, +) +googleanalytics_blueprint.add_url_rule( + u"/api//action/".format( + api.API_MAX_VERSION + ), + methods=["GET", "POST"], + view_func=action, +) +googleanalytics_blueprint.add_url_rule( + u"//action/".format( + api.API_MAX_VERSION + ), + methods=["GET", "POST"], + view_func=action, +) + + +def download(id, resource_id, filename=None, package_type="dataset"): + handler_path = toolkit.config.get("googleanalytics.download_handler") + using_default_handler = False + + if handler_path: + try: + download_handler = import_string(handler_path) + except (ImportError, ImportStringError) as e: + log.debug("`download_handler` configured but not found") + raise e + else: + log.debug("`download_handler` not configured, using CKAN's default which is: resource.download") + download_handler = resource.download + using_default_handler = True + + try: + post_analytics(g.user) + except Exception as e: + log.error(e) + + if using_default_handler: + return download_handler( + package_type= 'dataset', + id=id, + resource_id=resource_id, + filename=filename, + ) + else: + return download_handler( + id=id, + resource_id=resource_id, + filename=filename, + ) + + +googleanalytics_blueprint.add_url_rule( + "/dataset//resource//download", + view_func=download +) +googleanalytics_blueprint.add_url_rule( + "/dataset//resource//download/", + view_func=download, +) diff --git a/ckanext/googleanalytics/command.py b/ckanext/googleanalytics/command.py new file mode 100644 index 0000000..ff491b1 --- /dev/null +++ b/ckanext/googleanalytics/command.py @@ -0,0 +1,48 @@ +import logging +import click + +import ckan.plugins.toolkit as tk +import ckan.model as model +from ckanext.googleanalytics.model import init_tables +from ckanext.googleanalytics.logic import ( + load_package_stats, + load_url_stats +) + + +log = logging.getLogger(__name__) + + +def get_commands(): + return [googleanalytics] + + +@click.group(short_help=u"GoogleAnalytics commands") +def googleanalytics(): + pass + + +@googleanalytics.command() +def init(): + """Initialise the local stats database tables""" + model.Session.remove() + model.Session.configure(bind=model.meta.engine) + init_tables() + log.info("Set up statistics tables in main database") + + +@googleanalytics.command(short_help=u"Load data from Google Analytics API") +@click.argument("credentials", type=click.Path(exists=True)) +@click.option("-s", "--start-date", required=False) +def load(credentials, start_date): + """Parse data from Google Analytics API and store it + in a local database + """ + # Fetch package and resource download stats from GA and save them locally + packages_data = load_package_stats(credentials) + + # Fetch url stats from GA and save them locally + urls_data = load_url_stats(credentials) + + log.info("Saved %s packages visits from google" % len(packages_data)) + log.info("Saved %s urls visits from google" % len(urls_data)) diff --git a/ckanext/googleanalytics/logic.py b/ckanext/googleanalytics/logic.py new file mode 100644 index 0000000..c597efc --- /dev/null +++ b/ckanext/googleanalytics/logic.py @@ -0,0 +1,80 @@ +import hashlib +import json +import logging +import requests +import threading + +import ckan.plugins.toolkit as toolkit +import ckan.model as model + +from ckanext.googleanalytics.utils.ga import ( + _init_service, + _get_packages_data, + _save_packages_data, + _get_urls_data, + _save_urls_data +) + + +log = logging.getLogger(__name__) + + +class AnalyticsPostThread(threading.Thread): + def __init__(self, queue): + threading.Thread.__init__(self) + self.queue = queue + + def run(self): + while True: + data = self.queue.get() + log.debug("Sending API event to Google Analytics: GA4") + measurement_id = toolkit.config.get("googleanalytics.measurement_id") + api_secret = toolkit.config.get("googleanalytics.api_secret") + res = requests.post( + "https://www.google-analytics.com/mp/collect?measurement_id={}&api_secret={}".format(measurement_id, api_secret), + data=json.dumps(data), + timeout=10, + ) + self.queue.task_done() + + +def post_analytics(user): + from ckanext.googleanalytics.plugin import GoogleAnalyticsPlugin + + path = toolkit.request.environ["PATH_INFO"] + path_id = path.split("/dataset/")[1].split("/")[0] + context = { + u"model": model, + u"session": model.Session, + u"user": user + } + package = toolkit.get_action("package_show")(context, {"id": path_id}) + referer_link = "/dataset/{}".format(package.get("name")) + + resource_data = { + "client_id": hashlib.md5(six.ensure_binary(toolkit.c.user)).hexdigest(), + "events": [ + { + "name": "file_download", + "params" : { + "link_url": referer_link + } + } + ] + } + + GoogleAnalyticsPlugin.analytics_queue.put(resource_data) + + +def load_package_stats(credentials): + service = _init_service(credentials) + packages_data = _get_packages_data(service) + _save_packages_data(packages_data) + return packages_data + + +def load_url_stats(credentials): + service = _init_service(credentials) + urls_data = _get_urls_data(service) + _save_urls_data(urls_data) + return urls_data diff --git a/ckanext/googleanalytics/model.py b/ckanext/googleanalytics/model.py new file mode 100644 index 0000000..3f3bbe9 --- /dev/null +++ b/ckanext/googleanalytics/model.py @@ -0,0 +1,56 @@ +from sqlalchemy import Table, Column, Integer, String, MetaData + +import ckan.model as model +from ckan.lib.base import * + +from ckanext.googleanalytics.utils.db import _update_visits, _get_visits + + +def init_tables(): + metadata = MetaData() + package_stats = Table( + "package_stats", + metadata, + Column("package_id", String(60), primary_key=True), + Column("visits_recently", Integer), + Column("visits_ever", Integer), + ) + resource_stats = Table( + "resource_stats", + metadata, + Column("resource_id", String(60), primary_key=True), + Column("visits_recently", Integer), + Column("visits_ever", Integer), + ) + url_stats = Table( + "url_stats", + metadata, + Column("url_id", String(512), primary_key=True), + Column("visits_recently", Integer), + Column("visits_ever", Integer), + ) + metadata.create_all(model.meta.engine) + + +def update_package_visits(package_id, recently, ever): + return _update_visits("package_stats", package_id, recently, ever) + + +def update_resource_visits(resource_id, recently, ever): + return _update_visits("resource_stats", resource_id, recently, ever) + + +def update_url_visits(url_id, recently, ever): + return _update_visits("url_stats", url_id, recently, ever) + + +def get_package_stat(package_id): + return _get_visits("package_stats", package_id) + + +def get_resource_stat(resource_id): + return _get_visits("resource_stats", resource_id) + + +def get_url_stat(url_id): + return _get_visits("url_stats", url_id) diff --git a/ckanext/googleanalytics/plugin.py b/ckanext/googleanalytics/plugin.py new file mode 100644 index 0000000..beff47e --- /dev/null +++ b/ckanext/googleanalytics/plugin.py @@ -0,0 +1,156 @@ +import queue +import ast + +from ckanext.googleanalytics.blueprints import blueprints +from ckanext.googleanalytics.command import get_commands +from ckanext.googleanalytics.logic import AnalyticsPostThread +import ckan.plugins as plugins +import ckan.plugins.toolkit as toolkit +import ckanext.googleanalytics.actions as ga_actions +import ckanext.googleanalytics.helpers as ga_helpers + +DEFAULT_RESOURCE_URL_TAG = "/downloads/" + + +class GoogleAnalyticsPlugin(plugins.SingletonPlugin): + plugins.implements(plugins.IActions) + plugins.implements(plugins.IBlueprint) + plugins.implements(plugins.IClick) + plugins.implements(plugins.IConfigurable) + plugins.implements(plugins.IConfigurer, inherit=True) + plugins.implements(plugins.IPackageController, inherit=True) + plugins.implements(plugins.IResourceController, inherit=True) + plugins.implements(plugins.ITemplateHelpers) + + analytics_queue = queue.Queue() + + # IActions + def get_actions(self): + return { + "resource_stats": ga_actions.resource_stat, + "package_stats": ga_actions.package_stat, + "url_stats": ga_actions.url_stat, + "download_package_stats": ga_actions.download_package_stat + } + + # IBlueprint + def get_blueprint(self): + return blueprints + + # IClick + def get_commands(self): + return get_commands() + + # IConfigurable + def configure(self, config): + if "googleanalytics.measurement_id" not in config: + raise KeyError( + "Missing googleanalytics.measurement_id in config. One must be set." + ) + # TODO: Do we still need to submit `gogleanalytics_id` separately? + self.googleanalytics_id = config.get('googleanalytics.measurement_id') + self.googleanalytics_domain = config.get( + "googleanalytics.domain", "auto" + ) + self.googleanalytics_fields = ast.literal_eval( + config.get("googleanalytics.fields", "{}") + ) + + googleanalytics_linked_domains = config.get( + "googleanalytics.linked_domains", "" + ) + self.googleanalytics_linked_domains = [ + x.strip() for x in googleanalytics_linked_domains.split(",") if x + ] + + if self.googleanalytics_linked_domains: + self.googleanalytics_fields["allowLinker"] = "true" + + # If resource_prefix is not in config file then write the default value + # to the config dict, otherwise templates seem to get 'true' when they + # try to read resource_prefix from config. + if "googleanalytics_resource_prefix" not in config: + config[ + "googleanalytics_resource_prefix" + ] = DEFAULT_RESOURCE_URL_TAG + self.googleanalytics_resource_prefix = config[ + "googleanalytics_resource_prefix" + ] + + self.show_downloads = toolkit.asbool( + config.get("googleanalytics.show_downloads", True) + ) + self.track_events = toolkit.asbool( + config.get("googleanalytics.track_events", False) + ) + self.enable_user_id = toolkit.asbool( + config.get("googleanalytics.enable_user_id", False) + ) + + self.googleanalytics_measurment_id = config.get( + "googleanalytics.measurement_id", "" + ) + + # spawn a pool of 5 threads, and pass them queue instance + for i in range(5): + t = AnalyticsPostThread(self.analytics_queue) + t.setDaemon(True) + t.start() + + # IConfigurer + def update_config(self, config): + plugins.toolkit.add_template_directory(config, "../templates") + plugins.toolkit.add_resource("../assets", "ckanext-googleanalytics") + + # IPackageController (CKAN <= 2.9) + # IResourceController (CKAN <= 2.9) + def after_delete(self, context, data_dict): + # Make sure to delete package/resource visists when the corresponding package/resource is deleted + pass + + # IPackageController (CKAN > 2.9) + def after_dataset_delete(self, context, data_dict): + # Make sure to delete package visists when the corresponding package is deleted + pass + + # IResourceController (CKAN > 2.9) + def after_resource_delete(self, context, data_dict): + # Make sure to delete resource visists when the corresponding resource is deleted + pass + + # ITemplateHelpers + def get_helpers(self): + return { + "googleanalytics_header": self._googleanalytics_header, + "get_package_stats": ga_helpers.get_package_stats, + "get_resource_stats": ga_helpers.get_resource_stats, + "get_url_stats": ga_helpers.get_url_stats + } + + def _googleanalytics_header(self): + """Render the googleanalytics_header snippet. + + This is a template helper function that renders the + googleanalytics_header jinja snippet. To be called from the jinja + templates in this extension, see ITemplateHelpers. + + """ + try: + current_user = toolkit.c.user + except AttributeError: + current_user = False + + if self.enable_user_id and current_user: + self.googleanalytics_fields["userId"] = str(toolkit.c.userobj.id) + + self.googleanalytics_fields["anonymize_ip"] = "true" + data = { + "googleanalytics_id": self.googleanalytics_id, + "googleanalytics_domain": self.googleanalytics_domain, + "googleanalytics_fields": str(self.googleanalytics_fields), + "googleanalytics_linked_domains": self.googleanalytics_linked_domains, + "googleanalytics_measurement_id": self.googleanalytics_measurment_id + } + return plugins.toolkit.render_snippet( + "googleanalytics/snippets/googleanalytics_header.html", data + ) diff --git a/ckanext/googleanalytics/utils/db.py b/ckanext/googleanalytics/utils/db.py index 6058545..94a9a19 100644 --- a/ckanext/googleanalytics/utils/db.py +++ b/ckanext/googleanalytics/utils/db.py @@ -1,6 +1,4 @@ -import logging - -from sqlalchemy import Table, Column, Integer, String, MetaData +from sqlalchemy import MetaData from sqlalchemy.sql import select, text from sqlalchemy import func @@ -8,37 +6,10 @@ from ckan.lib.base import * -log = logging.getLogger(__name__) cached_tables = {} -def init_tables(): - metadata = MetaData() - package_stats = Table( - "package_stats", - metadata, - Column("package_id", String(60), primary_key=True), - Column("visits_recently", Integer), - Column("visits_ever", Integer), - ) - resource_stats = Table( - "resource_stats", - metadata, - Column("resource_id", String(60), primary_key=True), - Column("visits_recently", Integer), - Column("visits_ever", Integer), - ) - url_stats = Table( - "url_stats", - metadata, - Column("url_id", String(512), primary_key=True), - Column("visits_recently", Integer), - Column("visits_ever", Integer), - ) - metadata.create_all(model.meta.engine) - - -def get_table(name): +def _get_table(name): if name not in cached_tables: meta = MetaData() meta.reflect(bind=model.meta.engine) @@ -48,7 +19,7 @@ def get_table(name): def _update_visits(table_name, item_id, recently, ever): - stats = get_table(table_name) + stats = _get_table(table_name) id_col_name = "%s_id" % table_name[: -len("_stats")] id_col = getattr(stats.c, id_col_name) s = select([func.count(id_col)], id_col == item_id) @@ -67,23 +38,13 @@ def _update_visits(table_name, item_id, recently, ever): "visits_ever": ever, } connection.execute(stats.insert().values(**values)) + + model.Session.commit() -def update_package_visits(package_id, recently, ever): - return _update_visits("package_stats", package_id, recently, ever) - - -def update_resource_visits(resource_id, recently, ever): - return _update_visits("resource_stats", resource_id, recently, ever) - - -def update_url_visits(url_id, recently, ever): - return _update_visits("url_stats", url_id, recently, ever) - - -def _get_stats(table_name, item_id): +def _get_visits(table_name, item_id): connection = model.Session.connection() - stats = get_table(table_name) + stats = _get_table(table_name) id_col_name = "%s_id" % table_name[: -len("_stats")] id_col = getattr(stats.c, id_col_name) s = select( @@ -91,15 +52,3 @@ def _get_stats(table_name, item_id): ).where(id_col == item_id) res = connection.execute(s).fetchone() return res and res or [0] - - -def get_package_stat(package_id): - return _get_stats("package_stats", package_id) - - -def get_resource_stat(resource_id): - return _get_stats("resource_stats", resource_id) - - -def get_url_stat(url_id): - return _get_stats("url_stats", url_id) diff --git a/ckanext/googleanalytics/utils/ga.py b/ckanext/googleanalytics/utils/ga.py index 68d0506..ac97ccb 100644 --- a/ckanext/googleanalytics/utils/ga.py +++ b/ckanext/googleanalytics/utils/ga.py @@ -11,7 +11,7 @@ RESOURCE_URL_REGEX, PACKAGE_URL, _recent_view_days, - db as db_utils + db ) @@ -19,7 +19,7 @@ log = logging.getLogger(__name__) -def init_service(credentials_path): +def _init_service(credentials_path): scopes = ["https://www.googleapis.com/auth/analytics.readonly"] credentials = ServiceAccountCredentials.from_json_keyfile_name(credentials_path, scopes) http = httplib2.Http() @@ -29,65 +29,7 @@ def init_service(credentials_path): return service -def get_urls_data(service): - urls = {} - property_id = tk.config.get("googleanalytics.property_id") - dates = { - "recent": {"startDate": "{}daysAgo".format(_recent_view_days()), "endDate": "today"}, - "ever": {"startDate": "2024-01-01", "endDate": "today"} - } - - for date_name, date in dates.items(): - request_body = { - "requests": [{ - "dateRanges": [date], - "metrics": [{"name": "eventCount"}], - "dimensions": [{"name": "eventName"}, {"name": "pagePath"}] - }] - } - - response = service.properties().batchRunReports( - body=request_body, property='properties/{}'.format(property_id) - ).execute() - - for report in response.get('reports', []): - for row in report.get('rows', []): - event_category = row['dimensionValues'][0].get('value', '') - event_label = row['dimensionValues'][1].get('value', '') - event_count = row['metricValues'][0].get('value', 0) - - if event_category == "page_view": - url = event_label - views = event_count - count = 0 - if url in urls and date_name in urls[url]: - count += urls[url][date_name] - urls.setdefault(url, {})[date_name] = int(views) + count - - return urls - - -def save_urls_data(urls_data): - """Save tuples of urls_data to the database""" - urls = {} - for url_id, visits in urls_data.items(): - if url_id in urls: - urls[url_id]["recent"] += visits.get("recent", 0) - urls[url_id]["ever"] += visits.get("ever", 0) - else: - urls[url_id] = { - "recent": visits.get("recent", 0), - "ever": visits.get("ever", 0) - } - - for url_id, visits in urls.items(): - db_utils.update_url_visits(url_id, visits["recent"], visits["ever"]) - log.info("Updated URL path %s with %s visits" % (url_id, visits)) - - model.Session.commit() - - -def get_packages_data(service): +def _get_packages_data(service): packages = {} property_id = tk.config.get("googleanalytics.property_id") dates = { @@ -128,15 +70,15 @@ def get_packages_data(service): return packages -def save_packages_data(packages_data): +def _save_packages_data(packages_data): """Save tuples of packages_data to the database""" - def save_resource(resource_id, visits): - db_utils.update_resource_visits(resource_id, visits["recent"], visits["ever"]) - log.info("Updated resource %s with %s visits" % (resource.id, visits)) - def save_package(package_id, visits): - db_utils.update_package_visits(package_id, visits["recent"], visits["ever"]) + db._update_visits("package_stats", package_id, visits["recent"], visits["ever"]) log.info("Updated package %s with %s visits" % (package_id, visits)) + + def save_resource(resource_id, visits): + db._update_visits("resource_stats", resource_id, visits["recent"], visits["ever"]) + log.info("Updated resource %s with %s visits" % (resource.id, visits)) packages = {} for identifier, visits in packages_data.items(): @@ -176,4 +118,58 @@ def save_package(package_id, visits): for package_id, visits in packages.items(): save_package(package_id, visits) - model.Session.commit() + +def _get_urls_data(service): + urls = {} + property_id = tk.config.get("googleanalytics.property_id") + dates = { + "recent": {"startDate": "{}daysAgo".format(_recent_view_days()), "endDate": "today"}, + "ever": {"startDate": "2024-01-01", "endDate": "today"} + } + + for date_name, date in dates.items(): + request_body = { + "requests": [{ + "dateRanges": [date], + "metrics": [{"name": "eventCount"}], + "dimensions": [{"name": "eventName"}, {"name": "pagePath"}] + }] + } + + response = service.properties().batchRunReports( + body=request_body, property='properties/{}'.format(property_id) + ).execute() + + for report in response.get('reports', []): + for row in report.get('rows', []): + event_category = row['dimensionValues'][0].get('value', '') + event_label = row['dimensionValues'][1].get('value', '') + event_count = row['metricValues'][0].get('value', 0) + + if event_category == "page_view": + url = event_label + views = event_count + count = 0 + if url in urls and date_name in urls[url]: + count += urls[url][date_name] + urls.setdefault(url, {})[date_name] = int(views) + count + + return urls + + +def _save_urls_data(urls_data): + """Save tuples of urls_data to the database""" + urls = {} + for url_id, visits in urls_data.items(): + if url_id in urls: + urls[url_id]["recent"] += visits.get("recent", 0) + urls[url_id]["ever"] += visits.get("ever", 0) + else: + urls[url_id] = { + "recent": visits.get("recent", 0), + "ever": visits.get("ever", 0) + } + + for url_id, visits in urls.items(): + db._update_visits("url_stats", url_id, visits["recent"], visits["ever"]) + log.info("Updated URL path %s with %s visits" % (url_id, visits)) From aabeaaa0f55f23e7bd4c11204398ff2daeaabb7a Mon Sep 17 00:00:00 2001 From: Ntwali B Date: Thu, 28 Nov 2024 07:16:45 +0100 Subject: [PATCH 12/15] Formatting using black. --- ckanext/googleanalytics/actions.py | 30 ++-- .../googleanalytics/blueprints/__init__.py | 4 +- .../blueprints/googleanalytics_blueprint.py | 24 ++- ckanext/googleanalytics/command.py | 13 +- ckanext/googleanalytics/helpers.py | 10 +- ckanext/googleanalytics/logic.py | 31 ++-- ckanext/googleanalytics/model.py | 2 +- ckanext/googleanalytics/plugin.py | 20 +-- ckanext/googleanalytics/utils/__init__.py | 12 +- ckanext/googleanalytics/utils/db.py | 6 +- ckanext/googleanalytics/utils/ga.py | 150 +++++++++++------- ckanext/googleanalytics/utils/numerize.py | 11 +- 12 files changed, 172 insertions(+), 141 deletions(-) diff --git a/ckanext/googleanalytics/actions.py b/ckanext/googleanalytics/actions.py index 72c1078..81d8b93 100644 --- a/ckanext/googleanalytics/actions.py +++ b/ckanext/googleanalytics/actions.py @@ -6,7 +6,7 @@ from ckanext.googleanalytics.model import ( get_package_stat, get_resource_stat, - get_url_stat + get_url_stat, ) from ckanext.googleanalytics.logic import load_package_stats @@ -15,10 +15,10 @@ @toolkit.side_effect_free def package_stat(context, data_dict): - ''' + """ Fetch package stats - ''' - package_id = data_dict['package_id'] + """ + package_id = data_dict["package_id"] result = 0 try: result = get_package_stat(package_id)[0] @@ -29,10 +29,10 @@ def package_stat(context, data_dict): @toolkit.side_effect_free def resource_stat(context, data_dict): - ''' + """ Fetch resource stats - ''' - resource_id = data_dict['resource_id'] + """ + resource_id = data_dict["resource_id"] result = 0 try: result = get_resource_stat(resource_id)[0] @@ -43,10 +43,10 @@ def resource_stat(context, data_dict): @toolkit.side_effect_free def url_stat(context, data_dict): - ''' + """ Fetch url stats - ''' - url_id = data_dict['url_id'] + """ + url_id = data_dict["url_id"] result = 0 try: result = get_url_stat(url_id)[0] @@ -56,11 +56,9 @@ def url_stat(context, data_dict): def download_package_stat(context, data_dict): - ''' + """ Download package stats from Google analytics into the local database - ''' - credentials = data_dict['credentials_path'] + """ + credentials = data_dict["credentials_path"] packages_data = ckanext.googleanalytics(credentials) - return json.dumps({ - 'package_count': len(packages_data) - }) + return json.dumps({"package_count": len(packages_data)}) diff --git a/ckanext/googleanalytics/blueprints/__init__.py b/ckanext/googleanalytics/blueprints/__init__.py index f834f22..8f2a39b 100644 --- a/ckanext/googleanalytics/blueprints/__init__.py +++ b/ckanext/googleanalytics/blueprints/__init__.py @@ -1,4 +1,6 @@ -from ckanext.googleanalytics.blueprints.googleanalytics_blueprint import googleanalytics_blueprint +from ckanext.googleanalytics.blueprints.googleanalytics_blueprint import ( + googleanalytics_blueprint, +) blueprints = [ googleanalytics_blueprint, diff --git a/ckanext/googleanalytics/blueprints/googleanalytics_blueprint.py b/ckanext/googleanalytics/blueprints/googleanalytics_blueprint.py index 856ad78..c1d8a09 100644 --- a/ckanext/googleanalytics/blueprints/googleanalytics_blueprint.py +++ b/ckanext/googleanalytics/blueprints/googleanalytics_blueprint.py @@ -17,10 +17,7 @@ log = logging.getLogger(__name__) -googleanalytics_blueprint = Blueprint( - u'google_analytics', - __name__ -) +googleanalytics_blueprint = Blueprint("google_analytics", __name__) CONFIG_HANDLER_PATH = "googleanalytics.download_handler" @@ -36,7 +33,7 @@ def action(logic_function, ver=api.API_MAX_VERSION): if "q" in request_data: id = request_data["q"] if "query" in request_data: - id = request_data[u"query"] + id = request_data["query"] post_analytics(g.user) except Exception as e: log.debug(e) @@ -51,14 +48,14 @@ def action(logic_function, ver=api.API_MAX_VERSION): view_func=action, ) googleanalytics_blueprint.add_url_rule( - u"/api//action/".format( + "/api//action/".format( api.API_MAX_VERSION ), methods=["GET", "POST"], view_func=action, ) googleanalytics_blueprint.add_url_rule( - u"//action/".format( + "//action/".format( api.API_MAX_VERSION ), methods=["GET", "POST"], @@ -69,7 +66,7 @@ def action(logic_function, ver=api.API_MAX_VERSION): def download(id, resource_id, filename=None, package_type="dataset"): handler_path = toolkit.config.get("googleanalytics.download_handler") using_default_handler = False - + if handler_path: try: download_handler = import_string(handler_path) @@ -77,7 +74,9 @@ def download(id, resource_id, filename=None, package_type="dataset"): log.debug("`download_handler` configured but not found") raise e else: - log.debug("`download_handler` not configured, using CKAN's default which is: resource.download") + log.debug( + "`download_handler` not configured, using CKAN's default which is: resource.download" + ) download_handler = resource.download using_default_handler = True @@ -85,10 +84,10 @@ def download(id, resource_id, filename=None, package_type="dataset"): post_analytics(g.user) except Exception as e: log.error(e) - + if using_default_handler: return download_handler( - package_type= 'dataset', + package_type="dataset", id=id, resource_id=resource_id, filename=filename, @@ -102,8 +101,7 @@ def download(id, resource_id, filename=None, package_type="dataset"): googleanalytics_blueprint.add_url_rule( - "/dataset//resource//download", - view_func=download + "/dataset//resource//download", view_func=download ) googleanalytics_blueprint.add_url_rule( "/dataset//resource//download/", diff --git a/ckanext/googleanalytics/command.py b/ckanext/googleanalytics/command.py index ff491b1..441b7e4 100644 --- a/ckanext/googleanalytics/command.py +++ b/ckanext/googleanalytics/command.py @@ -4,10 +4,7 @@ import ckan.plugins.toolkit as tk import ckan.model as model from ckanext.googleanalytics.model import init_tables -from ckanext.googleanalytics.logic import ( - load_package_stats, - load_url_stats -) +from ckanext.googleanalytics.logic import load_package_stats, load_url_stats log = logging.getLogger(__name__) @@ -17,7 +14,7 @@ def get_commands(): return [googleanalytics] -@click.group(short_help=u"GoogleAnalytics commands") +@click.group(short_help="GoogleAnalytics commands") def googleanalytics(): pass @@ -31,7 +28,7 @@ def init(): log.info("Set up statistics tables in main database") -@googleanalytics.command(short_help=u"Load data from Google Analytics API") +@googleanalytics.command(short_help="Load data from Google Analytics API") @click.argument("credentials", type=click.Path(exists=True)) @click.option("-s", "--start-date", required=False) def load(credentials, start_date): @@ -40,9 +37,9 @@ def load(credentials, start_date): """ # Fetch package and resource download stats from GA and save them locally packages_data = load_package_stats(credentials) - + # Fetch url stats from GA and save them locally urls_data = load_url_stats(credentials) - + log.info("Saved %s packages visits from google" % len(packages_data)) log.info("Saved %s urls visits from google" % len(urls_data)) diff --git a/ckanext/googleanalytics/helpers.py b/ckanext/googleanalytics/helpers.py index 5057531..bbf7913 100644 --- a/ckanext/googleanalytics/helpers.py +++ b/ckanext/googleanalytics/helpers.py @@ -3,15 +3,19 @@ def get_package_stats(package_id): - package_stat = toolkit.get_action('package_stats')({}, {'package_id': package_id}) + package_stat = toolkit.get_action("package_stats")( + {}, {"package_id": package_id} + ) return numerize(int(package_stat)) def get_resource_stats(resource_id): - resource_stat = toolkit.get_action('resource_stats')({}, {'resource_id': resource_id}) + resource_stat = toolkit.get_action("resource_stats")( + {}, {"resource_id": resource_id} + ) return numerize(int(resource_stat)) def get_url_stats(url_id): - url_stat = toolkit.get_action('url_stats')({}, {'url_id': url_id}) + url_stat = toolkit.get_action("url_stats")({}, {"url_id": url_id}) return numerize(int(url_stat)) diff --git a/ckanext/googleanalytics/logic.py b/ckanext/googleanalytics/logic.py index c597efc..4297234 100644 --- a/ckanext/googleanalytics/logic.py +++ b/ckanext/googleanalytics/logic.py @@ -12,7 +12,7 @@ _get_packages_data, _save_packages_data, _get_urls_data, - _save_urls_data + _save_urls_data, ) @@ -28,10 +28,14 @@ def run(self): while True: data = self.queue.get() log.debug("Sending API event to Google Analytics: GA4") - measurement_id = toolkit.config.get("googleanalytics.measurement_id") + measurement_id = toolkit.config.get( + "googleanalytics.measurement_id" + ) api_secret = toolkit.config.get("googleanalytics.api_secret") res = requests.post( - "https://www.google-analytics.com/mp/collect?measurement_id={}&api_secret={}".format(measurement_id, api_secret), + "https://www.google-analytics.com/mp/collect?measurement_id={}&api_secret={}".format( + measurement_id, api_secret + ), data=json.dumps(data), timeout=10, ) @@ -40,27 +44,20 @@ def run(self): def post_analytics(user): from ckanext.googleanalytics.plugin import GoogleAnalyticsPlugin - + path = toolkit.request.environ["PATH_INFO"] path_id = path.split("/dataset/")[1].split("/")[0] - context = { - u"model": model, - u"session": model.Session, - u"user": user - } + context = {"model": model, "session": model.Session, "user": user} package = toolkit.get_action("package_show")(context, {"id": path_id}) referer_link = "/dataset/{}".format(package.get("name")) resource_data = { - "client_id": hashlib.md5(six.ensure_binary(toolkit.c.user)).hexdigest(), + "client_id": hashlib.md5( + six.ensure_binary(toolkit.c.user) + ).hexdigest(), "events": [ - { - "name": "file_download", - "params" : { - "link_url": referer_link - } - } - ] + {"name": "file_download", "params": {"link_url": referer_link}} + ], } GoogleAnalyticsPlugin.analytics_queue.put(resource_data) diff --git a/ckanext/googleanalytics/model.py b/ckanext/googleanalytics/model.py index 3f3bbe9..1f7be76 100644 --- a/ckanext/googleanalytics/model.py +++ b/ckanext/googleanalytics/model.py @@ -43,7 +43,7 @@ def update_resource_visits(resource_id, recently, ever): def update_url_visits(url_id, recently, ever): return _update_visits("url_stats", url_id, recently, ever) - + def get_package_stat(package_id): return _get_visits("package_stats", package_id) diff --git a/ckanext/googleanalytics/plugin.py b/ckanext/googleanalytics/plugin.py index beff47e..f2b5ed1 100644 --- a/ckanext/googleanalytics/plugin.py +++ b/ckanext/googleanalytics/plugin.py @@ -21,7 +21,7 @@ class GoogleAnalyticsPlugin(plugins.SingletonPlugin): plugins.implements(plugins.IPackageController, inherit=True) plugins.implements(plugins.IResourceController, inherit=True) plugins.implements(plugins.ITemplateHelpers) - + analytics_queue = queue.Queue() # IActions @@ -30,9 +30,9 @@ def get_actions(self): "resource_stats": ga_actions.resource_stat, "package_stats": ga_actions.package_stat, "url_stats": ga_actions.url_stat, - "download_package_stats": ga_actions.download_package_stat + "download_package_stats": ga_actions.download_package_stat, } - + # IBlueprint def get_blueprint(self): return blueprints @@ -48,7 +48,7 @@ def configure(self, config): "Missing googleanalytics.measurement_id in config. One must be set." ) # TODO: Do we still need to submit `gogleanalytics_id` separately? - self.googleanalytics_id = config.get('googleanalytics.measurement_id') + self.googleanalytics_id = config.get("googleanalytics.measurement_id") self.googleanalytics_domain = config.get( "googleanalytics.domain", "auto" ) @@ -70,9 +70,9 @@ def configure(self, config): # to the config dict, otherwise templates seem to get 'true' when they # try to read resource_prefix from config. if "googleanalytics_resource_prefix" not in config: - config[ - "googleanalytics_resource_prefix" - ] = DEFAULT_RESOURCE_URL_TAG + config["googleanalytics_resource_prefix"] = ( + DEFAULT_RESOURCE_URL_TAG + ) self.googleanalytics_resource_prefix = config[ "googleanalytics_resource_prefix" ] @@ -96,7 +96,7 @@ def configure(self, config): t = AnalyticsPostThread(self.analytics_queue) t.setDaemon(True) t.start() - + # IConfigurer def update_config(self, config): plugins.toolkit.add_template_directory(config, "../templates") @@ -124,7 +124,7 @@ def get_helpers(self): "googleanalytics_header": self._googleanalytics_header, "get_package_stats": ga_helpers.get_package_stats, "get_resource_stats": ga_helpers.get_resource_stats, - "get_url_stats": ga_helpers.get_url_stats + "get_url_stats": ga_helpers.get_url_stats, } def _googleanalytics_header(self): @@ -149,7 +149,7 @@ def _googleanalytics_header(self): "googleanalytics_domain": self.googleanalytics_domain, "googleanalytics_fields": str(self.googleanalytics_fields), "googleanalytics_linked_domains": self.googleanalytics_linked_domains, - "googleanalytics_measurement_id": self.googleanalytics_measurment_id + "googleanalytics_measurement_id": self.googleanalytics_measurment_id, } return plugins.toolkit.render_snippet( "googleanalytics/snippets/googleanalytics_header.html", data diff --git a/ckanext/googleanalytics/utils/__init__.py b/ckanext/googleanalytics/utils/__init__.py index a297a9f..ef0be09 100644 --- a/ckanext/googleanalytics/utils/__init__.py +++ b/ckanext/googleanalytics/utils/__init__.py @@ -3,10 +3,12 @@ import ckan.plugins.toolkit as tk -DEFAULT_RESOURCE_URL_TAG = "/downloads/" -DEFAULT_RECENT_VIEW_DAYS = 14 -RESOURCE_URL_REGEX = re.compile("(/.*/)dataset/([a-z0-9-_]+)/resource/([a-z0-9-_]+)") -PACKAGE_URL = "/dataset/" +DEFAULT_RESOURCE_URL_TAG = "/downloads/" +DEFAULT_RECENT_VIEW_DAYS = 14 +RESOURCE_URL_REGEX = re.compile( + "(/.*/)dataset/([a-z0-9-_]+)/resource/([a-z0-9-_]+)" +) +PACKAGE_URL = "/dataset/" def _resource_url_tag(): @@ -20,4 +22,4 @@ def _recent_view_days(): tk.config.get( "googleanalytics.recent_view_days", DEFAULT_RECENT_VIEW_DAYS ) - ) \ No newline at end of file + ) diff --git a/ckanext/googleanalytics/utils/db.py b/ckanext/googleanalytics/utils/db.py index 94a9a19..d63a09e 100644 --- a/ckanext/googleanalytics/utils/db.py +++ b/ckanext/googleanalytics/utils/db.py @@ -38,7 +38,7 @@ def _update_visits(table_name, item_id, recently, ever): "visits_ever": ever, } connection.execute(stats.insert().values(**values)) - + model.Session.commit() @@ -47,8 +47,6 @@ def _get_visits(table_name, item_id): stats = _get_table(table_name) id_col_name = "%s_id" % table_name[: -len("_stats")] id_col = getattr(stats.c, id_col_name) - s = select( - [stats.c.visits_ever] - ).where(id_col == item_id) + s = select([stats.c.visits_ever]).where(id_col == item_id) res = connection.execute(s).fetchone() return res and res or [0] diff --git a/ckanext/googleanalytics/utils/ga.py b/ckanext/googleanalytics/utils/ga.py index ac97ccb..88588b6 100644 --- a/ckanext/googleanalytics/utils/ga.py +++ b/ckanext/googleanalytics/utils/ga.py @@ -7,12 +7,7 @@ import ckan.model as model import ckan.plugins.toolkit as tk -from . import ( - RESOURCE_URL_REGEX, - PACKAGE_URL, - _recent_view_days, - db -) +from . import RESOURCE_URL_REGEX, PACKAGE_URL, _recent_view_days, db config = tk.config @@ -21,11 +16,15 @@ def _init_service(credentials_path): scopes = ["https://www.googleapis.com/auth/analytics.readonly"] - credentials = ServiceAccountCredentials.from_json_keyfile_name(credentials_path, scopes) + credentials = ServiceAccountCredentials.from_json_keyfile_name( + credentials_path, scopes + ) http = httplib2.Http() http = credentials.authorize(http) - - service = build('analyticsdata', 'v1beta', http=http, cache_discovery=False) + + service = build( + "analyticsdata", "v1beta", http=http, cache_discovery=False + ) return service @@ -33,28 +32,37 @@ def _get_packages_data(service): packages = {} property_id = tk.config.get("googleanalytics.property_id") dates = { - "recent": {"startDate": "{}daysAgo".format(_recent_view_days()), "endDate": "today"}, - "ever": {"startDate": "2024-01-01", "endDate": "today"} + "recent": { + "startDate": "{}daysAgo".format(_recent_view_days()), + "endDate": "today", + }, + "ever": {"startDate": "2024-01-01", "endDate": "today"}, } for date_name, date in dates.items(): request_body = { - "requests": [{ - "dateRanges": [date], - "metrics": [{"name": "eventCount"}], - "dimensions": [{"name": "eventName"}, {"name": "linkUrl"}] - }] + "requests": [ + { + "dateRanges": [date], + "metrics": [{"name": "eventCount"}], + "dimensions": [{"name": "eventName"}, {"name": "linkUrl"}], + } + ] } - - response = service.properties().batchRunReports( - body=request_body, property='properties/{}'.format(property_id) - ).execute() - - for report in response.get('reports', []): - for row in report.get('rows', []): - event_category = row['dimensionValues'][0].get('value', '') - event_label = row['dimensionValues'][1].get('value', '') - event_count = row['metricValues'][0].get('value', 0) + + response = ( + service.properties() + .batchRunReports( + body=request_body, property="properties/{}".format(property_id) + ) + .execute() + ) + + for report in response.get("reports", []): + for row in report.get("rows", []): + event_category = row["dimensionValues"][0].get("value", "") + event_label = row["dimensionValues"][1].get("value", "") + event_count = row["metricValues"][0].get("value", 0) if event_category == "file_download": package = event_label @@ -62,33 +70,43 @@ def _get_packages_data(service): if "/" in package: if not package.startswith(PACKAGE_URL): package = "/" + "/".join(package.split("/")[2:]) - + val = 0 - if package in packages and date_name in packages[package]: + if ( + package in packages + and date_name in packages[package] + ): val += packages[package][date_name] - packages.setdefault(package, {})[date_name] = int(count) + val + packages.setdefault(package, {})[date_name] = ( + int(count) + val + ) return packages def _save_packages_data(packages_data): """Save tuples of packages_data to the database""" + def save_package(package_id, visits): - db._update_visits("package_stats", package_id, visits["recent"], visits["ever"]) + db._update_visits( + "package_stats", package_id, visits["recent"], visits["ever"] + ) log.info("Updated package %s with %s visits" % (package_id, visits)) def save_resource(resource_id, visits): - db._update_visits("resource_stats", resource_id, visits["recent"], visits["ever"]) + db._update_visits( + "resource_stats", resource_id, visits["recent"], visits["ever"] + ) log.info("Updated resource %s with %s visits" % (resource.id, visits)) - + packages = {} for identifier, visits in packages_data.items(): matches = RESOURCE_URL_REGEX.match(identifier) - + if matches: resource_url = identifier.replace(matches.group(1), "") package_id = matches.group(2) resource_id = matches.group(3) - + connection = model.Session.connection() resource = ( model.Session.query(model.Resource) @@ -102,7 +120,7 @@ def save_resource(resource_id, visits): # we have a valid resource, we save it save_resource(resource.id, visits) - + # each resource is associated with a dataset/package it belongs to # therefore to update a package, we watch their corresponding resources if package_id in packages: @@ -111,40 +129,52 @@ def save_resource(resource_id, visits): else: packages[package_id] = { "recent": visits["recent"], - "ever": visits["ever"] + "ever": visits["ever"], } - + # update packages for package_id, visits in packages.items(): save_package(package_id, visits) - + def _get_urls_data(service): urls = {} property_id = tk.config.get("googleanalytics.property_id") dates = { - "recent": {"startDate": "{}daysAgo".format(_recent_view_days()), "endDate": "today"}, - "ever": {"startDate": "2024-01-01", "endDate": "today"} + "recent": { + "startDate": "{}daysAgo".format(_recent_view_days()), + "endDate": "today", + }, + "ever": {"startDate": "2024-01-01", "endDate": "today"}, } - + for date_name, date in dates.items(): request_body = { - "requests": [{ - "dateRanges": [date], - "metrics": [{"name": "eventCount"}], - "dimensions": [{"name": "eventName"}, {"name": "pagePath"}] - }] + "requests": [ + { + "dateRanges": [date], + "metrics": [{"name": "eventCount"}], + "dimensions": [ + {"name": "eventName"}, + {"name": "pagePath"}, + ], + } + ] } - - response = service.properties().batchRunReports( - body=request_body, property='properties/{}'.format(property_id) - ).execute() - - for report in response.get('reports', []): - for row in report.get('rows', []): - event_category = row['dimensionValues'][0].get('value', '') - event_label = row['dimensionValues'][1].get('value', '') - event_count = row['metricValues'][0].get('value', 0) + + response = ( + service.properties() + .batchRunReports( + body=request_body, property="properties/{}".format(property_id) + ) + .execute() + ) + + for report in response.get("reports", []): + for row in report.get("rows", []): + event_category = row["dimensionValues"][0].get("value", "") + event_label = row["dimensionValues"][1].get("value", "") + event_count = row["metricValues"][0].get("value", 0) if event_category == "page_view": url = event_label @@ -167,9 +197,11 @@ def _save_urls_data(urls_data): else: urls[url_id] = { "recent": visits.get("recent", 0), - "ever": visits.get("ever", 0) + "ever": visits.get("ever", 0), } - + for url_id, visits in urls.items(): - db._update_visits("url_stats", url_id, visits["recent"], visits["ever"]) + db._update_visits( + "url_stats", url_id, visits["recent"], visits["ever"] + ) log.info("Updated URL path %s with %s visits" % (url_id, visits)) diff --git a/ckanext/googleanalytics/utils/numerize.py b/ckanext/googleanalytics/utils/numerize.py index beaf9ba..298a2bc 100644 --- a/ckanext/googleanalytics/utils/numerize.py +++ b/ckanext/googleanalytics/utils/numerize.py @@ -1,11 +1,14 @@ def numerize(num): - ''' + """ Shows a number in a human readable format. Source: https://stackoverflow.com/a/45846841 - ''' - num = float('{:.3g}'.format(num)) + """ + num = float("{:.3g}".format(num)) magnitude = 0 while abs(num) >= 1000: magnitude += 1 num /= 1000.0 - return '{}{}'.format('{:f}'.format(num).rstrip('0').rstrip('.'), ['', 'K', 'M', 'B', 'T'][magnitude]) + return "{}{}".format( + "{:f}".format(num).rstrip("0").rstrip("."), + ["", "K", "M", "B", "T"][magnitude], + ) From 70afee63376ca29b9c4f953ab7eff489512a2569 Mon Sep 17 00:00:00 2001 From: Ntwali B Date: Thu, 28 Nov 2024 07:23:55 +0100 Subject: [PATCH 13/15] Bug fix. --- ckanext/googleanalytics/actions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckanext/googleanalytics/actions.py b/ckanext/googleanalytics/actions.py index 81d8b93..6e8d6d1 100644 --- a/ckanext/googleanalytics/actions.py +++ b/ckanext/googleanalytics/actions.py @@ -60,5 +60,5 @@ def download_package_stat(context, data_dict): Download package stats from Google analytics into the local database """ credentials = data_dict["credentials_path"] - packages_data = ckanext.googleanalytics(credentials) + packages_data = load_package_stats(credentials) return json.dumps({"package_count": len(packages_data)}) From d398ed0da9ed60d4ee9c7c18a21fab9bc94d1ee3 Mon Sep 17 00:00:00 2001 From: Ntwali B Date: Thu, 28 Nov 2024 07:36:58 +0100 Subject: [PATCH 14/15] Added the action to fetch latest pages views counts. --- ckanext/googleanalytics/actions.py | 11 ++++++++++- ckanext/googleanalytics/plugin.py | 1 + 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/ckanext/googleanalytics/actions.py b/ckanext/googleanalytics/actions.py index 6e8d6d1..4394314 100644 --- a/ckanext/googleanalytics/actions.py +++ b/ckanext/googleanalytics/actions.py @@ -8,7 +8,7 @@ get_resource_stat, get_url_stat, ) -from ckanext.googleanalytics.logic import load_package_stats +from ckanext.googleanalytics.logic import load_package_stats, load_url_stats log = logging.getLogger(__name__) @@ -62,3 +62,12 @@ def download_package_stat(context, data_dict): credentials = data_dict["credentials_path"] packages_data = load_package_stats(credentials) return json.dumps({"package_count": len(packages_data)}) + + +def download_url_stat(context, data_dict): + """ + Download URL stats from Google analytics into the local database + """ + credentials = data_dict["credentials_path"] + urls_data = load_url_stats(credentials) + return json.dumps({"url_count": len(urls_data)}) diff --git a/ckanext/googleanalytics/plugin.py b/ckanext/googleanalytics/plugin.py index f2b5ed1..878f0fa 100644 --- a/ckanext/googleanalytics/plugin.py +++ b/ckanext/googleanalytics/plugin.py @@ -31,6 +31,7 @@ def get_actions(self): "package_stats": ga_actions.package_stat, "url_stats": ga_actions.url_stat, "download_package_stats": ga_actions.download_package_stat, + "download_url_stats": ga_actions.download_url_stat, } # IBlueprint From 2ec28f5d709fea38b200d3dd5f6924c957cd2e1c Mon Sep 17 00:00:00 2001 From: Ntwali B Date: Thu, 28 Nov 2024 07:39:28 +0100 Subject: [PATCH 15/15] Removed old tests -- to be filled by new tests. --- tests/__init__.py | 0 tests/accountsfixture.xml | 1 - tests/downloadfixture.xml | 8 --- tests/mockgoogleanalytics.py | 61 ------------------- tests/packagefixture.xml | 9 --- tests/test_general.py | 112 ----------------------------------- 6 files changed, 191 deletions(-) delete mode 100644 tests/__init__.py delete mode 100644 tests/accountsfixture.xml delete mode 100644 tests/downloadfixture.xml delete mode 100644 tests/mockgoogleanalytics.py delete mode 100644 tests/packagefixture.xml delete mode 100644 tests/test_general.py diff --git a/tests/__init__.py b/tests/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/accountsfixture.xml b/tests/accountsfixture.xml deleted file mode 100644 index 565ad08..0000000 --- a/tests/accountsfixture.xml +++ /dev/null @@ -1 +0,0 @@ -2011-03-13T01:59:55.435-08:00http://www.google.com/analytics/feeds/accounts/seb.bacon@okfn.orgGoogle AnalyticsGoogle Analytics ga:visitorType==New Visitorga:visitorType==Returning Visitorga:medium==cpa,ga:medium==cpc,ga:medium==cpm,ga:medium==cpp,ga:medium==cpv,ga:medium==ppcga:medium==organicga:medium==cpa,ga:medium==cpc,ga:medium==cpm,ga:medium==cpp,ga:medium==cpv,ga:medium==organic,ga:medium==ppcga:medium==(none)ga:medium==referralga:goalCompletionsAll>0ga:transactions>0ga:isMobile==Yesga:bounces==011300http://www.google.com/analytics/feeds/accounts/ga:42156377ga:421563772011-03-13T01:59:55.435-08:00borfProfile list for seb.bacon@okfn.org diff --git a/tests/downloadfixture.xml b/tests/downloadfixture.xml deleted file mode 100644 index 6db89d5..0000000 --- a/tests/downloadfixture.xml +++ /dev/null @@ -1,8 +0,0 @@ -http://www.google.com/analytics/feeds/data?ids=ga:42156377&dimensions=ga:pagePath&metrics=ga:newVisits,ga:uniquePageviews,ga:visitors,ga:visits&filters=ga:pagePath%3D~%5E/downloads/&start-date=2011-03-22&end-date=2011-04-05datagm.staging.ckan.net/ga:421563772011-04-05T03:08:58.420-07:00false - -http://www.google.com/analytics/feeds/data?ids=ga:42156377&ga:pagePath=/downloads/http%3A%2F%2Fwww.annakarenina.com%2Findex.json&filters=ga:pagePath%3D~%5E/downloads/&start-date=2011-03-22&end-date=2011-04-052011-04-04T17:00:00.001-07:00ga:pagePath=/downloads/http%3A%2F%2Fwww.annakarenina.com%2Findex.json - - -missingthing2011-04-04T17:00:00.001-07:00ga:pagePath=/downloads/missingthing - -Google Analytics1Google Analytics Data for Profile 42156377Google Analytics2011-04-05432011-03-2210000 diff --git a/tests/mockgoogleanalytics.py b/tests/mockgoogleanalytics.py deleted file mode 100644 index 1babbf5..0000000 --- a/tests/mockgoogleanalytics.py +++ /dev/null @@ -1,61 +0,0 @@ -import os -import BaseHTTPServer -import threading - -here_dir = os.path.dirname(os.path.abspath(__file__)) - - -class MockHandler(BaseHTTPServer.BaseHTTPRequestHandler): - def do_GET(self): - if "feeds/accounts/default" in self.path: - self.send_response(200) - self.end_headers() - fixture = os.path.join(here_dir, "accountsfixture.xml") - content = open(fixture, "r").read() - elif "analytics/feeds/data" in self.path: - if "dataset" in self.path: - fixture = os.path.join(here_dir, "packagefixture.xml") - elif "download" in self.path: - fixture = os.path.join(here_dir, "downloadfixture.xml") - self.send_response(200) - self.end_headers() - content = open(fixture, "r").read() - else: - self.send_response(200) - self.end_headers() - content = "empty" - self.wfile.write(content) - - def do_POST(self): - if "ClientLogin" in self.path: - self.send_response(200) - self.end_headers() - content = "Auth=blah" - else: - self.send_response(200) - self.end_headers() - content = "empty" - self.wfile.write(content) - - def do_QUIT(self): - self.send_response(200) - self.end_headers() - self.server.stop = True - - -class ReusableServer(BaseHTTPServer.HTTPServer): - allow_reuse_address = 1 - - def serve_til_quit(self): - self.stop = False - while not self.stop: - self.handle_request() - - -def runmockserver(): - server_address = ("localhost", 6969) - httpd = ReusableServer(server_address, MockHandler) - httpd_thread = threading.Thread(target=httpd.serve_til_quit) - httpd_thread.setDaemon(True) - httpd_thread.start() - return httpd_thread diff --git a/tests/packagefixture.xml b/tests/packagefixture.xml deleted file mode 100644 index 26a5417..0000000 --- a/tests/packagefixture.xml +++ /dev/null @@ -1,9 +0,0 @@ -http://www.google.com/analytics/feeds/data?ids=ga:42156377&dimensions=ga:pagePath&metrics=ga:newVisits,ga:uniquePageviews,ga:visitors,ga:visits&filters=ga:pagePath%3D~%5E/dataset/&start-date=2011-03-22&end-date=2011-04-05datagm.staging.ckan.net/ga:421563772011-04-05T03:08:57.106-07:00false - -http://www.google.com/analytics/feeds/data?ids=ga:42156377&ga:pagePath=/dataset/annakarenina&filters=ga:pagePath%3D~%5E/dataset/&start-date=2011-03-22&end-date=2011-04-052011-04-04T17:00:00.001-07:00ga:pagePath=/dataset/annakarenina - -http://www.google.com/analytics/feeds/data?ids=ga:42156377&ga:pagePath=/dataset/annakarenina/invalid&filters=ga:pagePath%3D~%5E/dataset/&start-date=2011-03-22&end-date=2011-04-052011-04-04T17:00:00.001-07:00ga:pagePath=/dataset/annakarenina/invalid - -http://www.google.com/analytics/feeds/data?ids=ga:42156377&ga:pagePath=/dataset/annakarenina-invalid&filters=ga:pagePath%3D~%5E/dataset/&start-date=2011-03-22&end-date=2011-04-052011-04-04T17:00:00.001-07:00ga:pagePath=/dataset/annakarenina-invalid - -Google Analytics1Google Analytics Data for Profile 42156377Google Analytics2011-04-051522011-03-2210000 diff --git a/tests/test_general.py b/tests/test_general.py deleted file mode 100644 index c958376..0000000 --- a/tests/test_general.py +++ /dev/null @@ -1,112 +0,0 @@ -import httplib -from unittest import TestCase - -from ckan.config.middleware import make_app -from paste.deploy import appconfig -import paste.fixture -from ckan.tests import conf_dir, url_for, CreateTestData - -from mockgoogleanalytics import runmockserver -from ckanext.googleanalytics.commands import LoadAnalytics -from ckanext.googleanalytics.commands import InitDB -from ckanext.googleanalytics.utils import db as db_utils -import ckanext.googleanalytics.gasnippet as gasnippet - - -class MockClient(httplib.HTTPConnection): - def request(self, http_request): - filters = http_request.uri.query.get("filters") - path = http_request.uri.path - if filters: - if "dataset" in filters: - path += "/dataset" - else: - path += "/download" - httplib.HTTPConnection.request(self, http_request.method, path) - resp = self.getresponse() - return resp - - -class TestConfig(TestCase): - def test_config(self): - config = appconfig("config:test.ini", relative_to=conf_dir) - config.local_conf["ckan.plugins"] = "googleanalytics" - config.local_conf["googleanalytics.id"] = "" - command = LoadAnalytics("loadanalytics") - command.CONFIG = config.local_conf - self.assertRaises(Exception, command.run, []) - - -class TestLoadCommand(TestCase): - @classmethod - def setup_class(cls): - InitDB("initdb").run([]) # set up database tables - - config = appconfig("config:test.ini", relative_to=conf_dir) - config.local_conf["ckan.plugins"] = "googleanalytics" - config.local_conf["googleanalytics.username"] = "borf" - config.local_conf["googleanalytics.password"] = "borf" - config.local_conf["googleanalytics.id"] = "UA-borf-1" - config.local_conf["googleanalytics.show_downloads"] = "true" - cls.config = config.local_conf - wsgiapp = make_app(config.global_conf, **config.local_conf) - env = { - "HTTP_ACCEPT": ( - "text/html;q=0.9,text/plain;" "q=0.8,image/png,*/*;q=0.5" - ) - } - cls.app = paste.fixture.TestApp(wsgiapp, extra_environ=env) - CreateTestData.create() - runmockserver() - - @classmethod - def teardown_class(cls): - CreateTestData.delete() - conn = httplib.HTTPConnection("localhost:%d" % 6969) - conn.request("QUIT", "/") - conn.getresponse() - - def test_analytics_snippet(self): - response = self.app.get(url_for(controller="tag", action="index")) - code = gasnippet.header_code % ( - self.config["googleanalytics.id"], - "auto", - ) - assert code in response.body - - def test_download_count_inserted(self): - command = LoadAnalytics("loadanalytics") - command.TEST_HOST = MockClient("localhost", 6969) - command.CONFIG = self.config - command.run([]) - response = self.app.get( - url_for(controller="package", action="read", id="annakarenina") - ) - assert "[downloaded 4 times]" in response.body - - def test_js_inserted_resource_view(self): - from nose import SkipTest - - raise SkipTest("Test won't work until CKAN 1.5.2") - - from ckan.logic.action import get - from ckan import model - - context = {"model": model, "ignore_auth": True} - data = {"id": "annakarenina"} - pkg = get.package_show(context, data) - resource_id = pkg["resources"][0]["id"] - - command = LoadAnalytics("loadanalytics") - command.TEST_HOST = MockClient("localhost", 6969) - command.CONFIG = self.config - command.run([]) - response = self.app.get( - url_for( - controller="package", - action="resource_read", - id="annakarenina", - resource_id=resource_id, - ) - ) - assert 'onclick="javascript: _gaq.push(' in response.body