cambridge-cares · mhs62 · Apr 23, 2024 · Apr 23, 2024 · Apr 23, 2024 · Apr 23, 2024
diff --git a/JPS_BASE_LIB/pom.xml b/JPS_BASE_LIB/pom.xml
@@ -14,9 +14,10 @@
 
     <!-- Project Properties -->
     <properties>
-        <!-- Most of these are set in the parent pom -->
+        <maven.compiler.source>17</maven.compiler.source>
+        <maven.compiler.target>17</maven.compiler.target>
     </properties>
-
+      
     <!-- Parent POM -->
     <parent>
         <groupId>uk.ac.cam.cares.jps</groupId>
@@ -43,9 +44,9 @@
             <plugin>
                 <artifactId>maven-compiler-plugin</artifactId>
                 <configuration>
-                    <source>1.8</source>
-                    <target>1.8</target>
-                    <release>8</release>
+                    <source>17</source>
+                    <target>17</target>
+                    <release>17</release>
                     <compilerArgs>
                         <arg>-Xpkginfo:always</arg>
                     </compilerArgs>
@@ -226,11 +227,27 @@
             <groupId>org.apache.httpcomponents</groupId>
             <artifactId>httpclient</artifactId>
         </dependency>
+        <dependency>
+            <groupId>org.apache.httpcomponents</groupId>
+            <artifactId>httpcore</artifactId>
+            <version>4.4.14</version>
+        </dependency>
 
         <!-- ??? -->
+        <dependency>
+            <groupId>org.apache.jena</groupId>
+            <artifactId>jena-iri</artifactId>
+            <version>4.8.0</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.jena</groupId>
+            <artifactId>jena-base</artifactId>
+            <version>4.8.0</version>
+        </dependency>
         <dependency>
             <groupId>org.apache.jena</groupId>
             <artifactId>jena-arq</artifactId>
+            <version>4.8.0</version>
         </dependency>
         <dependency>
             <groupId>org.apache.jena</groupId>
@@ -410,7 +427,43 @@
             <groupId>org.glassfish.jersey.core</groupId>
             <artifactId>jersey-common</artifactId>
         </dependency>
+        <dependency>
+            <groupId>org.apache.jena</groupId>
+            <artifactId>apache-jena-libs</artifactId>
+            <version>4.8.0</version>
+            <type>pom</type>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.jena</groupId>
+            <artifactId>jena-core</artifactId>
+            <version>4.8.0</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.jena</groupId>
+            <artifactId>jena-tdb</artifactId>
+            <version>4.8.0</version>
+        </dependency>
+        <dependency>
+            <groupId>org.eclipse.rdf4j</groupId>
+            <artifactId>rdf4j-bom</artifactId>
+            <version>3.0.4</version>
+            <type>pom</type>
+            <scope>import</scope>
+        </dependency>
+
     </dependencies>
 
+    <repositories>
+        <repository>
+            <id>oss.sonatype.org-snapshot</id>
+            <url>https://oss.sonatype.org/content/repositories/snapshots</url>
+            <releases>
+                <enabled>false</enabled>
+            </releases>
+            <snapshots>
+                <enabled>true</enabled>
+            </snapshots>
+        </repository>
+    </repositories>
 
 </project>
diff --git a/JPS_BASE_LIB/python_federated_query/README.md b/JPS_BASE_LIB/python_federated_query/README.md
@@ -0,0 +1,54 @@
+# Description #
+
+The `build_kg_index.py` module build inverted index of concepts to the files providing a base directory. It automatically considers files and files under sub-folders. It saves the inverted index in a file called `inverted_index.json`. On the other hand, `load_kg_index.py` loads an index from a saved file and produces bar-chart to see the concept to files association bar-chart.
+
+# Installation
+
+You need to install rdflib and matplotlib for this purpose.
+
+## Virtual environment setup
+
+It is highly recommended to use a virtual environment (https://docs.python.org/3/tutorial/venv.html) for the owl2jsonld.py module.
+The virtual environment can be created as follows:
+
+`(Windows)`
+
+```cmd
+$ python -m venv <venv_name>
+$ <venv_name>\Scripts\activate.bat
+(<venv_name>) $
+```
+
+`(Linux)`
+```sh
+$ python3 -m venv <venv_name>
+$ source <venv_name>/bin/activate
+(<venv_name>) $
+```
+
+The above commands will create and activate the virtual environment `<venv_name>` in the current directory.
+
+
+## Installation of required libraries
+
+To install the `rdflib` and `matplotlib` simply run the following command:
+
+```sh
+(<venv_name>) $ pip install rdflib matplotlib
+```
+
+The above command will install the  `rdflib` and `matplotlib` packages.
+
+
+# Requirements #
+
+
+
+# Command line interface usage #
+
+## Converter CLI
+
+
+
+# Authors #
+Md Hanif Seddiqui (mhs62@cam.ac.uk), 23 April 2024
diff --git a/JPS_BASE_LIB/python_federated_query/analyse_sparql.py b/JPS_BASE_LIB/python_federated_query/analyse_sparql.py
@@ -0,0 +1,159 @@
+from rdflib.plugins.sparql.parser import parseQuery
+from rdflib.plugins.sparql.algebra import translateQuery
+from rdflib import URIRef, BNode
+import rdflib.plugins.sparql.algebra as algebra
+import json
+
+class AnalyseSparql:
+
+    def __init__(self,sparql_query):
+        self.query_object = translateQuery(parseQuery(sparql_query))
+        self.class_index_file_path=""
+        self.property_index_file_path=""
+        self.cp_index_file_path=""
+        self.classes = set()
+        self.properties = set()
+        self.class_index = {}
+        self.property_index = {}
+        self.cp_index = {}
+
+    def set_index_location(self,index_dir):
+      if(index_dir.strip()[-1]=="/"):
+          index_dir=index_dir.strip()
+      else:
+          index_dir=index_dir.strip()+"/"
+
+      self.class_index_file_path= index_dir+"cinv.indx"
+      self.property_index_file_path=index_dir+"pinv.indx"
+      self.cp_index_file_path=index_dir+"cpinv.indx"
+
+    def extract_classes_and_properties(self):
+        """Analyzes a SPARQL query to extract classes and properties.
+
+        Args:
+            sparql_query (str): The SPARQL query to analyze.
+
+        Returns:
+            tuple: A tuple containing two sets:
+                * classes (set): A set of URIRefs representing classes.
+                * properties (set): A set of URIRefs representing properties.
+        """ 
+
+        self.traverse_query_tree(self.query_object.algebra)
+        return self.classes, self.properties
+
+    def traverse_query_tree(self,node):
+        # Safely check for and iterate over triples
+        if hasattr(node, 'triples') and node.triples is not None:
+            for s, p, o in node.triples:
+                if isinstance(s, URIRef):
+                    self.classes.add(s)
+                if isinstance(p, URIRef):
+                    self.properties.add(p)
+                if isinstance(o, URIRef) and o.startswith("http"):
+                    self.classes.add(o)  # Heuristic for class as object
+
+        # Recursively traverse child nodes
+        for attr in ['p', 'p1', 'p2', 'expr']:  # Common child node attributes
+            if hasattr(node, attr):
+                child = getattr(node, attr)
+                if isinstance(child, list):
+                    for item in child:
+                        self.traverse_query_tree(item)
+                else:
+                    self.traverse_query_tree(child)
+
+    #Load index from file
+    def load_indices(self):
+        self.load_class_index()
+        self.load_property_index()
+        self.load_concept2property_index()
+
+    def load_class_index(self):
+        try:
+            with open(self.class_index_file_path, 'r') as file:
+                self.class_index = json.load(file)
+            print(f"Class Index loaded from {self.class_index_file_path}")
+        except FileNotFoundError:
+            print(f"File '{self.class_index_file_path}' not found.")
+        except json.JSONDecodeError:
+            print(f"Error decoding JSON data from '{self.class_index_file_path}'.")
+
+    def load_property_index(self):
+        try:
+            with open(self.property_index_file_path, 'r') as file:
+                self.property_index = json.load(file)
+            print(f"Property Index loaded from {self.property_index_file_path}")
+        except FileNotFoundError:
+            print(f"File '{self.property_index_file_path}' not found.")
+        except json.JSONDecodeError:
+            print(f"Error decoding JSON data from '{self.property_index_file_path}'.")
+
+    def load_concept2property_index(self):
+        try:
+            # Open the file for reading
+            with open(self.cp_index_file_path, 'r') as file:
+                # Load the JSON data from the file into the index variable
+                self.cp_index = json.load(file)
+            print("Class-Property multilevel inverted index loaded successfully.")
+        except FileNotFoundError:
+            print(f"File '{self.cp_index_file_path}' not found.")
+        except json.JSONDecodeError:
+            print(f"Error decoding JSON data from '{self.cp_index_file_path}'.")
+
+    def get_endpoints(self):
+        endpoints = set()
+        classes, properties = ana.extract_classes_and_properties()
+
+        print("Analysing Classes:")
+        for class_uriref in classes:
+            class_uri=str(class_uriref)
+
+            if class_uri in self.class_index:
+                print("Found class alignment: " + class_uri)
+                for endpoint in self.class_index[class_uri]:
+                    endpoints.add(endpoint)
+            else: print("Un-aligned class: "+class_uri)
+
+        print("Analysing Properties:")        
+        for property_uriref in properties:
+            property_uri=str(property_uriref)
+            if property_uri in self.property_index:
+                print("Found property alignment:" + property_uri)
+                for endpoint in self.property_index[property_uri]:
+                    endpoints.add(endpoint)
+            else: print("Un-aligned property: "+property_uri)
+
+        print("The Final Endpoints: ")
+        print(endpoints)
+
+# Example usage
+sparql_query = """
+PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+PREFIX owl: <http://www.w3.org/2002/07/owl#>
+PREFIX pt: <http://www.daml.org/2003/01/periodictable/PeriodicTable.owl#>
+PREFIX OntoKin: <http://www.theworldavatar.com/ontology/ontokin/OntoKin.owl#>
+PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+
+SELECT ?identifier ?atomicMass ?atomicMassUnits
+WHERE {
+    ?element1 rdf:type pt:Element .
+    BIND(STRAFTER(STR(?element1), "#") AS ?identifier)
+    ?element2 rdf:type OntoKin:Element .
+    ?element2 rdfs:label ?identifier1 .
+    ?element2 OntoKin:hasAtomicMass ?atomicMass .
+    ?element2 OntoKin:hasAtomicMassUnits ?atomicMassUnits .
+    FILTER(?identifier = ?identifier1)
+}
+"""
+
+# usage
+if __name__ == "__main__":
+    index_location='C:/Users/printer_admin/Downloads/KGs/'
+
+    ana = AnalyseSparql(sparql_query)
+    ana.set_index_location(index_location)
+    ana.load_indices()
+    ana.get_endpoints()
+
+