|
| 1 | + |
| 2 | +""" |
| 3 | +This file defines a few constants which configure |
| 4 | +which Wikibase instance and which property/item ids |
| 5 | +should be used |
| 6 | +""" |
| 7 | + |
| 8 | +# Endpoint of the MediaWiki API of the Wikibase instance |
| 9 | +mediawiki_api_endpoint = 'https://www.wikidata.org/w/api.php' |
| 10 | + |
| 11 | +# SPARQL endpoint |
| 12 | +wikibase_sparql_endpoint = 'https://query.wikidata.org/sparql' |
| 13 | + |
| 14 | +# Name of the Wikibase instance |
| 15 | +wikibase_name = 'Wikidata' |
| 16 | + |
| 17 | +# URL of the main page of the Wikibase instance |
| 18 | +wikibase_main_page = 'https://www.wikidata.org/wiki/Wikidata:Main_Page' |
| 19 | + |
| 20 | +# Wikibase namespace ID, used to search for items |
| 21 | +# For Wikidata this is 0, but most by default Wikibase uses 120, which is the default Wikibase 'Item:' namespace |
| 22 | +# CHANGE THIS TO 120 if you are adapting this configuration file to another Wikibase |
| 23 | +wikibase_namespace_id = 0 |
| 24 | + |
| 25 | +# Namespace prefix of Wikibase items (including colon, e.g. 'Item:') |
| 26 | +wikibase_namespace_prefix = '' |
| 27 | + |
| 28 | +# User agent to connect to the Wikidata APIs |
| 29 | +user_agent = 'OpenRefine-Wikidata reconciliation interface' |
| 30 | + |
| 31 | +# Regexes and group ids to extracts Qids and Pids from URLs |
| 32 | +import re |
| 33 | +q_re = re.compile(r'(<?https?://www.wikidata.org/(entity|wiki)/)?(Q[0-9]+)>?') |
| 34 | +q_re_group_id = 3 |
| 35 | +p_re = re.compile(r'(<?https?://www.wikidata.org/(entity/|wiki/Property:))?(P[0-9]+)>?') |
| 36 | +p_re_group_id = 3 |
| 37 | + |
| 38 | +# Identifier space and schema space exposed to OpenRefine. |
| 39 | +# This should match the IRI prefixes used in RDF serialization. |
| 40 | +# Note that you should be careful about using http or https there, |
| 41 | +# because any variation will break comparisons at various places. |
| 42 | +identifier_space = 'http://www.wikidata.org/entity/' |
| 43 | +schema_space = 'http://www.wikidata.org/prop/direct/' |
| 44 | + |
| 45 | +# Pattern used to form the URL of a Qid. |
| 46 | +# This is only used for viewing so it is fine to use any protocol (therefore, preferably HTTPS if supported) |
| 47 | +qid_url_pattern = 'https://www.wikidata.org/wiki/{{id}}' |
| 48 | + |
| 49 | +# By default, filter out any items which are instance |
| 50 | +# of a subclass of this class. |
| 51 | +# For Wikidata, this is "Wikimedia internal stuff". |
| 52 | +# This filters out the disambiguation pages, categories, ... |
| 53 | +# Set to None to disable this filter |
| 54 | +avoid_items_of_class = 'Q17442446' |
| 55 | + |
| 56 | + |
| 57 | +# Service name exposed at various places, |
| 58 | +# mainly in the list of reconciliation services of users |
| 59 | +service_name = 'DEV Wikidata' |
| 60 | + |
| 61 | +# URL (without the trailing slash) where this server runs |
| 62 | +this_host = 'http://localhost:8000' |
| 63 | + |
| 64 | +# The default limit on the number of results returned by us |
| 65 | +default_num_results = 25 |
| 66 | + |
| 67 | +# The maximum number of search results to retrieve from the Wikidata search API |
| 68 | +wd_api_max_search_results = 50 # need a bot account to get more |
| 69 | + |
| 70 | +# The matching score above which we should automatically match an item |
| 71 | +validation_threshold = 95 |
| 72 | + |
| 73 | +# Redis client used for caching at various places |
| 74 | +redis_uri = 'redis://localhost:ACTUAL_REDIS_PORT/0?encoding=utf-8' |
| 75 | + |
| 76 | +# Redis prefix to use in front of all keys |
| 77 | +redis_key_prefix = 'openrefine_wikidata:' |
| 78 | + |
| 79 | +# Headers for the HTTP requests made by the tool |
| 80 | +headers = { |
| 81 | + 'User-Agent':service_name + ' (OpenRefine-Wikibase reconciliation service)', |
| 82 | +} |
| 83 | + |
| 84 | +# Previewing settings |
| 85 | + |
| 86 | +# Dimensions of the preview |
| 87 | +zoom_ratio = 1.0 |
| 88 | +preview_height = 100 |
| 89 | +preview_width = 400 |
| 90 | + |
| 91 | +# With which should be requested from Commons for the thumbnail |
| 92 | +thumbnail_width = 130 |
| 93 | + |
| 94 | +# All properties to use to get an image. Set to empty list [] if no image properties are available. |
| 95 | +image_properties = [ |
| 96 | + 'P18', |
| 97 | + 'P14', |
| 98 | + 'P15', |
| 99 | + 'P158', |
| 100 | + 'P181', |
| 101 | + 'P242', |
| 102 | + 'P1766', |
| 103 | + 'P1801', |
| 104 | + 'P1846', |
| 105 | + 'P2713', |
| 106 | + 'P2716', |
| 107 | + 'P2910', |
| 108 | + 'P3311', |
| 109 | + 'P3383', |
| 110 | + 'P3451', |
| 111 | + 'P1621', |
| 112 | + 'P154', |
| 113 | +] |
| 114 | + |
| 115 | +# URL pattern to retrieve an image from its filename |
| 116 | +image_download_pattern = 'https://upload.wikimedia.org/wikipedia/commons/thumb/%s/%s/%s/%dpx-%s' |
| 117 | + |
| 118 | +# Fallback URL of the image to use when previewing an item with no image |
| 119 | +fallback_image_url = this_host + '/static/wikidata.png' |
| 120 | + |
| 121 | +# Alt text of the fallback image |
| 122 | +fallback_image_alt = 'Wikidata' |
| 123 | + |
| 124 | +# Autodescribe endpoint to use. |
| 125 | +# this is used to generate automatic descriptions from item contents. |
| 126 | +# (disable this with: autodescribe_endpoint = None ) |
| 127 | +autodescribe_endpoint = 'https://tools.wmflabs.org/autodesc/' |
| 128 | + |
| 129 | +# Property proposal settings |
| 130 | + |
| 131 | +# Default type : entity (Q35120) |
| 132 | +# Set to None if so such item exists. |
| 133 | +default_type_entity = 'Q35120' |
| 134 | + |
| 135 | +# Property path used to obtain the type of an item |
| 136 | +type_property_path = 'P31' |
| 137 | + |
| 138 | +# Property to follow to fetch properties for a given type. |
| 139 | +# Set to None if this is not available |
| 140 | +property_for_this_type_property = 'P1963' |
| 141 | + |
| 142 | +# Optional prefix in front of properties in SPARQL-like property paths |
| 143 | +wdt_prefix = 'wdt:' |
| 144 | + |
| 145 | +# Sparql query used to fetch all the subclasses of a given item. |
| 146 | +# The '$qid' string will be replaced by the qid whose children should be fetched. |
| 147 | +sparql_query_to_fetch_subclasses = """ |
| 148 | +SELECT ?child WHERE { ?child wdt:P279* wd:$qid } |
| 149 | +""" |
| 150 | + |
| 151 | +# Sparql query used to fetch all the properties which store unique identifiers |
| 152 | +sparql_query_to_fetch_unique_id_properties = """ |
| 153 | +SELECT ?pid WHERE { ?pid wdt:P31/wdt:P279* wd:Q19847637 } |
| 154 | +""" |
| 155 | + |
| 156 | +# Sparql query used to propose properties to fetch for items of a given class. |
| 157 | +# Set to None if property proposal should be disabled. |
| 158 | +sparql_query_to_propose_properties = """ |
| 159 | +SELECT ?prop ?propLabel ?depth WHERE { |
| 160 | +SERVICE gas:service { |
| 161 | + gas:program gas:gasClass "com.bigdata.rdf.graph.analytics.BFS" . |
| 162 | + gas:program gas:in wd:$base_type . |
| 163 | + gas:program gas:out ?out . |
| 164 | + gas:program gas:out1 ?depth . |
| 165 | + gas:program gas:maxIterations 10 . |
| 166 | + gas:program gas:maxVisited 100 . |
| 167 | + gas:program gas:linkType wdt:P279 . |
| 168 | +} |
| 169 | +SERVICE wikibase:label { bd:serviceParam wikibase:language "$lang" } |
| 170 | +?out wdt:$property_for_this_type ?prop . |
| 171 | +} |
| 172 | +ORDER BY ?depth |
| 173 | +LIMIT $limit |
| 174 | +""" |
0 commit comments