Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
ff9d802
Create build_kg_index.py
mhs62 Apr 23, 2024
fb7bef8
Create load_kg_index.py
mhs62 Apr 23, 2024
89e5b0b
Create README.md
mhs62 Apr 23, 2024
fc333ff
Update build_kg_index.py
mhs62 Apr 23, 2024
0457dc4
Update load_kg_index.py
mhs62 Apr 23, 2024
f343a97
Update README.md
mhs62 Apr 23, 2024
45ef3e7
Create build_kg_multilevel_index.py
mhs62 Apr 26, 2024
89347fc
Create load_kg_multilevel_index.py
mhs62 Apr 26, 2024
63fe644
Update load_kg_index.py
mhs62 Apr 26, 2024
694cbb2
renamed
mhs62 Apr 26, 2024
a15f500
Create build_cp2endpoints_indx.py
mhs62 Apr 26, 2024
ca4fc41
Update build_cp2endpoints_indx.py
mhs62 Apr 26, 2024
f18f0c8
Create build_c2endpoints_indx.py
mhs62 Jun 7, 2024
371ffff
Update build_cp2endpoints_indx.py
mhs62 Jun 7, 2024
e1dc4aa
Create analyse_sparql.py
mhs62 Jun 7, 2024
46463d6
Create process_sparql.py
mhs62 Jun 11, 2024
8c26aec
Create build_p2endpoints_indx.py
mhs62 Jun 12, 2024
287a8ac
load_c_indx is renamed as load_single_indx
mhs62 Jun 12, 2024
70be3d0
Update build_cp2endpoints_indx.py
mhs62 Jun 14, 2024
6e75836
renamed
mhs62 Jun 14, 2024
6ff68b9
Update analyse_sparql.py
mhs62 Jun 14, 2024
5c9b1bb
Update process_sparql.py
mhs62 Jun 14, 2024
a980fe9
moved to backup_extra
mhs62 Jun 14, 2024
bd25f4d
Create BuildInvertedIndex.java
mhs62 Jun 27, 2024
6774e6e
Create ProcessQuery.java
mhs62 Jun 27, 2024
cc75a50
Update pom.xml
mhs62 Jun 27, 2024
de0ee5c
Update TimeSeriesRDBClientIntegrationTest.java
mhs62 Jun 27, 2024
5336db4
Update BuildInvertedIndex.java
mhs62 Jul 2, 2024
bde4234
Create stopcps.json
mhs62 Jul 2, 2024
eed40fc
Update ProcessQuery.java
mhs62 Jul 2, 2024
2625698
Update BuildInvertedIndex.java
mhs62 Jul 2, 2024
c5174b7
Update ProcessQuery.java
mhs62 Jul 3, 2024
fa7d05e
Update ProcessQuery.java
mhs62 Jul 3, 2024
fcb252b
Update ProcessQuery.java
mhs62 Jul 26, 2024
17f20a8
Update BuildInvertedIndex.java
mhs62 Jul 26, 2024
74e1970
Update ProcessQuery.java
mhs62 Oct 16, 2024
4c58ea1
Revert "Update ProcessQuery.java"
mhs62 Oct 16, 2024
f8010b3
dev-federated-query-performance-improvement: a method has been renamed
mhs62 Oct 30, 2024
97d9dcb
dev-federated-query-performance-improvement: BuildInvertedIndex has b…
mhs62 Oct 30, 2024
cc236d8
dev-federated-query-performance-improvement: integration is tested
mhs62 Oct 30, 2024
1b64182
dev-federated-query-performance-improvement: add test method on crea…
mhs62 Oct 30, 2024
6573ee3
dev-federated-query-performance-improvement: extraction of triples fr…
mhs62 Nov 1, 2024
6788091
Create csv2tif.py
mhs62 Jan 31, 2025
afddd85
Create tif2csv.py
mhs62 Jan 31, 2025
a92c4f6
Merge remote-tracking branch 'origin/main' into dev-federated-query-p…
Ushcode Feb 3, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 58 additions & 5 deletions JPS_BASE_LIB/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@

<!-- Project Properties -->
<properties>
<!-- Most of these are set in the parent pom -->
<maven.compiler.source>17</maven.compiler.source>
<maven.compiler.target>17</maven.compiler.target>
</properties>

<!-- Parent POM -->
<parent>
<groupId>uk.ac.cam.cares.jps</groupId>
Expand All @@ -43,9 +44,9 @@
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.8</source>
<target>1.8</target>
<release>8</release>
<source>17</source>
<target>17</target>
<release>17</release>
<compilerArgs>
<arg>-Xpkginfo:always</arg>
</compilerArgs>
Expand Down Expand Up @@ -226,11 +227,27 @@
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore</artifactId>
<version>4.4.14</version>
</dependency>

<!-- ??? -->
<dependency>
<groupId>org.apache.jena</groupId>
<artifactId>jena-iri</artifactId>
<version>4.8.0</version>
</dependency>
<dependency>
<groupId>org.apache.jena</groupId>
<artifactId>jena-base</artifactId>
<version>4.8.0</version>
</dependency>
<dependency>
<groupId>org.apache.jena</groupId>
<artifactId>jena-arq</artifactId>
<version>4.8.0</version>
</dependency>
<dependency>
<groupId>org.apache.jena</groupId>
Expand Down Expand Up @@ -410,7 +427,43 @@
<groupId>org.glassfish.jersey.core</groupId>
<artifactId>jersey-common</artifactId>
</dependency>
<dependency>
<groupId>org.apache.jena</groupId>
<artifactId>apache-jena-libs</artifactId>
<version>4.8.0</version>
<type>pom</type>
</dependency>
<dependency>
<groupId>org.apache.jena</groupId>
<artifactId>jena-core</artifactId>
<version>4.8.0</version>
</dependency>
<dependency>
<groupId>org.apache.jena</groupId>
<artifactId>jena-tdb</artifactId>
<version>4.8.0</version>
</dependency>
<dependency>
<groupId>org.eclipse.rdf4j</groupId>
<artifactId>rdf4j-bom</artifactId>
<version>3.0.4</version>
<type>pom</type>
<scope>import</scope>
</dependency>

</dependencies>

<repositories>
<repository>
<id>oss.sonatype.org-snapshot</id>
<url>https://oss.sonatype.org/content/repositories/snapshots</url>
<releases>
<enabled>false</enabled>
</releases>
<snapshots>
<enabled>true</enabled>
</snapshots>
</repository>
</repositories>

</project>
54 changes: 54 additions & 0 deletions JPS_BASE_LIB/python_federated_query/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Description #

The `build_kg_index.py` module build inverted index of concepts to the files providing a base directory. It automatically considers files and files under sub-folders. It saves the inverted index in a file called `inverted_index.json`. On the other hand, `load_kg_index.py` loads an index from a saved file and produces bar-chart to see the concept to files association bar-chart.

# Installation

You need to install rdflib and matplotlib for this purpose.

## Virtual environment setup

It is highly recommended to use a virtual environment (https://docs.python.org/3/tutorial/venv.html) for the owl2jsonld.py module.
The virtual environment can be created as follows:

`(Windows)`

```cmd
$ python -m venv <venv_name>
$ <venv_name>\Scripts\activate.bat
(<venv_name>) $
```

`(Linux)`
```sh
$ python3 -m venv <venv_name>
$ source <venv_name>/bin/activate
(<venv_name>) $
```

The above commands will create and activate the virtual environment `<venv_name>` in the current directory.


## Installation of required libraries

To install the `rdflib` and `matplotlib` simply run the following command:

```sh
(<venv_name>) $ pip install rdflib matplotlib
```

The above command will install the `rdflib` and `matplotlib` packages.


# Requirements #



# Command line interface usage #

## Converter CLI



# Authors #
Md Hanif Seddiqui (mhs62@cam.ac.uk), 23 April 2024
159 changes: 159 additions & 0 deletions JPS_BASE_LIB/python_federated_query/analyse_sparql.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
from rdflib.plugins.sparql.parser import parseQuery
from rdflib.plugins.sparql.algebra import translateQuery
from rdflib import URIRef, BNode
import rdflib.plugins.sparql.algebra as algebra
import json

class AnalyseSparql:

def __init__(self,sparql_query):
self.query_object = translateQuery(parseQuery(sparql_query))
self.class_index_file_path=""
self.property_index_file_path=""
self.cp_index_file_path=""
self.classes = set()
self.properties = set()
self.class_index = {}
self.property_index = {}
self.cp_index = {}

def set_index_location(self,index_dir):
if(index_dir.strip()[-1]=="/"):
index_dir=index_dir.strip()
else:
index_dir=index_dir.strip()+"/"

self.class_index_file_path= index_dir+"cinv.indx"
self.property_index_file_path=index_dir+"pinv.indx"
self.cp_index_file_path=index_dir+"cpinv.indx"

def extract_classes_and_properties(self):
"""Analyzes a SPARQL query to extract classes and properties.

Args:
sparql_query (str): The SPARQL query to analyze.

Returns:
tuple: A tuple containing two sets:
* classes (set): A set of URIRefs representing classes.
* properties (set): A set of URIRefs representing properties.
"""

self.traverse_query_tree(self.query_object.algebra)
return self.classes, self.properties

def traverse_query_tree(self,node):
# Safely check for and iterate over triples
if hasattr(node, 'triples') and node.triples is not None:
for s, p, o in node.triples:
if isinstance(s, URIRef):
self.classes.add(s)
if isinstance(p, URIRef):
self.properties.add(p)
if isinstance(o, URIRef) and o.startswith("http"):
self.classes.add(o) # Heuristic for class as object

# Recursively traverse child nodes
for attr in ['p', 'p1', 'p2', 'expr']: # Common child node attributes
if hasattr(node, attr):
child = getattr(node, attr)
if isinstance(child, list):
for item in child:
self.traverse_query_tree(item)
else:
self.traverse_query_tree(child)

#Load index from file
def load_indices(self):
self.load_class_index()
self.load_property_index()
self.load_concept2property_index()

def load_class_index(self):
try:
with open(self.class_index_file_path, 'r') as file:
self.class_index = json.load(file)
print(f"Class Index loaded from {self.class_index_file_path}")
except FileNotFoundError:
print(f"File '{self.class_index_file_path}' not found.")
except json.JSONDecodeError:
print(f"Error decoding JSON data from '{self.class_index_file_path}'.")

def load_property_index(self):
try:
with open(self.property_index_file_path, 'r') as file:
self.property_index = json.load(file)
print(f"Property Index loaded from {self.property_index_file_path}")
except FileNotFoundError:
print(f"File '{self.property_index_file_path}' not found.")
except json.JSONDecodeError:
print(f"Error decoding JSON data from '{self.property_index_file_path}'.")

def load_concept2property_index(self):
try:
# Open the file for reading
with open(self.cp_index_file_path, 'r') as file:
# Load the JSON data from the file into the index variable
self.cp_index = json.load(file)
print("Class-Property multilevel inverted index loaded successfully.")
except FileNotFoundError:
print(f"File '{self.cp_index_file_path}' not found.")
except json.JSONDecodeError:
print(f"Error decoding JSON data from '{self.cp_index_file_path}'.")

def get_endpoints(self):
endpoints = set()
classes, properties = ana.extract_classes_and_properties()

print("Analysing Classes:")
for class_uriref in classes:
class_uri=str(class_uriref)

if class_uri in self.class_index:
print("Found class alignment: " + class_uri)
for endpoint in self.class_index[class_uri]:
endpoints.add(endpoint)
else: print("Un-aligned class: "+class_uri)

print("Analysing Properties:")
for property_uriref in properties:
property_uri=str(property_uriref)
if property_uri in self.property_index:
print("Found property alignment:" + property_uri)
for endpoint in self.property_index[property_uri]:
endpoints.add(endpoint)
else: print("Un-aligned property: "+property_uri)

print("The Final Endpoints: ")
print(endpoints)

# Example usage
sparql_query = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX pt: <http://www.daml.org/2003/01/periodictable/PeriodicTable.owl#>
PREFIX OntoKin: <http://www.theworldavatar.com/ontology/ontokin/OntoKin.owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

SELECT ?identifier ?atomicMass ?atomicMassUnits
WHERE {
?element1 rdf:type pt:Element .
BIND(STRAFTER(STR(?element1), "#") AS ?identifier)
?element2 rdf:type OntoKin:Element .
?element2 rdfs:label ?identifier1 .
?element2 OntoKin:hasAtomicMass ?atomicMass .
?element2 OntoKin:hasAtomicMassUnits ?atomicMassUnits .
FILTER(?identifier = ?identifier1)
}
"""

# usage
if __name__ == "__main__":
index_location='C:/Users/printer_admin/Downloads/KGs/'

ana = AnalyseSparql(sparql_query)
ana.set_index_location(index_location)
ana.load_indices()
ana.get_endpoints()


Loading
Loading