diff --git a/JPS_BASE_LIB/pom.xml b/JPS_BASE_LIB/pom.xml index 6940528d6e7..8f24020ec84 100644 --- a/JPS_BASE_LIB/pom.xml +++ b/JPS_BASE_LIB/pom.xml @@ -14,9 +14,10 @@ - + 17 + 17 - + uk.ac.cam.cares.jps @@ -43,9 +44,9 @@ maven-compiler-plugin - 1.8 - 1.8 - 8 + 17 + 17 + 17 -Xpkginfo:always @@ -226,11 +227,27 @@ org.apache.httpcomponents httpclient + + org.apache.httpcomponents + httpcore + 4.4.14 + + + org.apache.jena + jena-iri + 4.8.0 + + + org.apache.jena + jena-base + 4.8.0 + org.apache.jena jena-arq + 4.8.0 org.apache.jena @@ -410,7 +427,43 @@ org.glassfish.jersey.core jersey-common + + org.apache.jena + apache-jena-libs + 4.8.0 + pom + + + org.apache.jena + jena-core + 4.8.0 + + + org.apache.jena + jena-tdb + 4.8.0 + + + org.eclipse.rdf4j + rdf4j-bom + 3.0.4 + pom + import + + + + + oss.sonatype.org-snapshot + https://oss.sonatype.org/content/repositories/snapshots + + false + + + true + + + \ No newline at end of file diff --git a/JPS_BASE_LIB/python_federated_query/README.md b/JPS_BASE_LIB/python_federated_query/README.md new file mode 100644 index 00000000000..7f120135dfc --- /dev/null +++ b/JPS_BASE_LIB/python_federated_query/README.md @@ -0,0 +1,54 @@ +# Description # + +The `build_kg_index.py` module build inverted index of concepts to the files providing a base directory. It automatically considers files and files under sub-folders. It saves the inverted index in a file called `inverted_index.json`. On the other hand, `load_kg_index.py` loads an index from a saved file and produces bar-chart to see the concept to files association bar-chart. + +# Installation + +You need to install rdflib and matplotlib for this purpose. + +## Virtual environment setup + +It is highly recommended to use a virtual environment (https://docs.python.org/3/tutorial/venv.html) for the owl2jsonld.py module. +The virtual environment can be created as follows: + +`(Windows)` + +```cmd +$ python -m venv +$ \Scripts\activate.bat +() $ +``` + +`(Linux)` +```sh +$ python3 -m venv +$ source /bin/activate +() $ +``` + +The above commands will create and activate the virtual environment `` in the current directory. + + +## Installation of required libraries + +To install the `rdflib` and `matplotlib` simply run the following command: + +```sh +() $ pip install rdflib matplotlib +``` + +The above command will install the `rdflib` and `matplotlib` packages. + + +# Requirements # + + + +# Command line interface usage # + +## Converter CLI + + + +# Authors # +Md Hanif Seddiqui (mhs62@cam.ac.uk), 23 April 2024 \ No newline at end of file diff --git a/JPS_BASE_LIB/python_federated_query/analyse_sparql.py b/JPS_BASE_LIB/python_federated_query/analyse_sparql.py new file mode 100644 index 00000000000..2440b7a4997 --- /dev/null +++ b/JPS_BASE_LIB/python_federated_query/analyse_sparql.py @@ -0,0 +1,159 @@ +from rdflib.plugins.sparql.parser import parseQuery +from rdflib.plugins.sparql.algebra import translateQuery +from rdflib import URIRef, BNode +import rdflib.plugins.sparql.algebra as algebra +import json + +class AnalyseSparql: + + def __init__(self,sparql_query): + self.query_object = translateQuery(parseQuery(sparql_query)) + self.class_index_file_path="" + self.property_index_file_path="" + self.cp_index_file_path="" + self.classes = set() + self.properties = set() + self.class_index = {} + self.property_index = {} + self.cp_index = {} + + def set_index_location(self,index_dir): + if(index_dir.strip()[-1]=="/"): + index_dir=index_dir.strip() + else: + index_dir=index_dir.strip()+"/" + + self.class_index_file_path= index_dir+"cinv.indx" + self.property_index_file_path=index_dir+"pinv.indx" + self.cp_index_file_path=index_dir+"cpinv.indx" + + def extract_classes_and_properties(self): + """Analyzes a SPARQL query to extract classes and properties. + + Args: + sparql_query (str): The SPARQL query to analyze. + + Returns: + tuple: A tuple containing two sets: + * classes (set): A set of URIRefs representing classes. + * properties (set): A set of URIRefs representing properties. + """ + + self.traverse_query_tree(self.query_object.algebra) + return self.classes, self.properties + + def traverse_query_tree(self,node): + # Safely check for and iterate over triples + if hasattr(node, 'triples') and node.triples is not None: + for s, p, o in node.triples: + if isinstance(s, URIRef): + self.classes.add(s) + if isinstance(p, URIRef): + self.properties.add(p) + if isinstance(o, URIRef) and o.startswith("http"): + self.classes.add(o) # Heuristic for class as object + + # Recursively traverse child nodes + for attr in ['p', 'p1', 'p2', 'expr']: # Common child node attributes + if hasattr(node, attr): + child = getattr(node, attr) + if isinstance(child, list): + for item in child: + self.traverse_query_tree(item) + else: + self.traverse_query_tree(child) + + #Load index from file + def load_indices(self): + self.load_class_index() + self.load_property_index() + self.load_concept2property_index() + + def load_class_index(self): + try: + with open(self.class_index_file_path, 'r') as file: + self.class_index = json.load(file) + print(f"Class Index loaded from {self.class_index_file_path}") + except FileNotFoundError: + print(f"File '{self.class_index_file_path}' not found.") + except json.JSONDecodeError: + print(f"Error decoding JSON data from '{self.class_index_file_path}'.") + + def load_property_index(self): + try: + with open(self.property_index_file_path, 'r') as file: + self.property_index = json.load(file) + print(f"Property Index loaded from {self.property_index_file_path}") + except FileNotFoundError: + print(f"File '{self.property_index_file_path}' not found.") + except json.JSONDecodeError: + print(f"Error decoding JSON data from '{self.property_index_file_path}'.") + + def load_concept2property_index(self): + try: + # Open the file for reading + with open(self.cp_index_file_path, 'r') as file: + # Load the JSON data from the file into the index variable + self.cp_index = json.load(file) + print("Class-Property multilevel inverted index loaded successfully.") + except FileNotFoundError: + print(f"File '{self.cp_index_file_path}' not found.") + except json.JSONDecodeError: + print(f"Error decoding JSON data from '{self.cp_index_file_path}'.") + + def get_endpoints(self): + endpoints = set() + classes, properties = ana.extract_classes_and_properties() + + print("Analysing Classes:") + for class_uriref in classes: + class_uri=str(class_uriref) + + if class_uri in self.class_index: + print("Found class alignment: " + class_uri) + for endpoint in self.class_index[class_uri]: + endpoints.add(endpoint) + else: print("Un-aligned class: "+class_uri) + + print("Analysing Properties:") + for property_uriref in properties: + property_uri=str(property_uriref) + if property_uri in self.property_index: + print("Found property alignment:" + property_uri) + for endpoint in self.property_index[property_uri]: + endpoints.add(endpoint) + else: print("Un-aligned property: "+property_uri) + + print("The Final Endpoints: ") + print(endpoints) + +# Example usage +sparql_query = """ +PREFIX rdf: +PREFIX owl: +PREFIX pt: +PREFIX OntoKin: +PREFIX rdfs: + +SELECT ?identifier ?atomicMass ?atomicMassUnits +WHERE { + ?element1 rdf:type pt:Element . + BIND(STRAFTER(STR(?element1), "#") AS ?identifier) + ?element2 rdf:type OntoKin:Element . + ?element2 rdfs:label ?identifier1 . + ?element2 OntoKin:hasAtomicMass ?atomicMass . + ?element2 OntoKin:hasAtomicMassUnits ?atomicMassUnits . + FILTER(?identifier = ?identifier1) +} +""" + +# usage +if __name__ == "__main__": + index_location='C:/Users/printer_admin/Downloads/KGs/' + + ana = AnalyseSparql(sparql_query) + ana.set_index_location(index_location) + ana.load_indices() + ana.get_endpoints() + + diff --git a/JPS_BASE_LIB/python_federated_query/backup_extra/build_c2endpoints_indx.py b/JPS_BASE_LIB/python_federated_query/backup_extra/build_c2endpoints_indx.py new file mode 100644 index 00000000000..4a89eb14143 --- /dev/null +++ b/JPS_BASE_LIB/python_federated_query/backup_extra/build_c2endpoints_indx.py @@ -0,0 +1,172 @@ +from rdflib import Graph, Namespace, URIRef +from rdflib.plugins.stores.sparqlstore import SPARQLStore +import os +import json +import requests +from tqdm import tqdm + +class BuildKGIndex: + + def __init__(self): + self.triplecount=0 + self.class_index = {} + self.graph = Graph() + + + def start_processing(self,endpoint_url): + sparql_query = """ + PREFIX rdf: + PREFIX owl: + PREFIX rdfs: + + SELECT ?class + WHERE { + ?subject a ?class . + FILTER ( + isIRI(?class) && + ?class != owl:Ontology && + ?class != owl:Class && + ?class != owl:NamedIndividual + ) + } + """ + + try: + # Define the HTTP headers + headers = { + "Accept": "application/sparql-results+json" # Specify the format of the response + } + + # Send the SPARQL query to the endpoint + response = requests.post(endpoint_url, data={"query": sparql_query}, headers=headers) + + + # Check if the request was successful (status code 200) + if response.status_code == 200: + # Parse the JSON response + json_response = response.json() + + progress_bar = tqdm(total=len(json_response["results"]["bindings"]), desc="Processing triples", unit="triple") + # Process the results + for binding in json_response["results"]["bindings"]: + class_uri = binding["class"]["value"] + + # print(f"Class: {class_uri}") + + # Check if class_uri is not empty + if (not class_uri): + continue + + # Check if class_uri already exists in cp_index + if class_uri not in self.class_index: + self.class_index[class_uri] = set() + + if endpoint_url not in self.class_index[class_uri]: + self.class_index[class_uri].add(endpoint_url) + + # self.build_inverted_index(classes,file_path) + # if endpoint_url not in self.class_index[class_uri]: + # self.class_index[class_uri].append(endpoint_url) + + # Update progress bar for each triple processed + progress_bar.update(1) + + progress_bar.close() + else: + print("Error:", response.status_code, response.text) + + # # Query the SPARQL endpoint with the SPARQL query + # self.graph.parse(self.endpoint_url, format='xml') + + # # Execute the SPARQL query and process the results + # concept_property_pair_results = self.graph.query(sparql_query) + # self.build_concept2property_invindex(concept_property_pair_results, self.endpoint_url) + except Exception as e: + print("Error:", e) + return None + + return self.class_index + + # def save_concept2endpoint_invindex(self, file_path): + # with open(file_path, 'w') as f: + # json.dump(self.class_index, f, indent=4) + def get_index(self): + return self.class_index + + def add_triples_and_update_index(self, new_triples, endpoint_url): + modified_classes = set() + + for s, p, o in new_triples: + self.graph.add((s, p, o)) + + if p == URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"): + class_uri = str(o) + + if class_uri not in self.class_index: + self.class_index[class_uri] = set() + + self.class_index[class_uri].add(endpoint_url) + modified_classes.add(class_uri) + + self.triplecount += len(new_triples) + print(f"Added {len(new_triples)} triples and updated the index.") + + return modified_classes + + def append_index(self, file_path, modified_classes): + if os.path.exists(file_path): + with open(file_path, 'r') as file: + current_index = json.load(file) + else: + current_index = {} + + for class_uri in modified_classes: + if class_uri not in current_index: + current_index[class_uri] = list(self.class_index[class_uri]) + else: + current_index[class_uri] = list(set(current_index[class_uri]) | self.class_index[class_uri]) + + with open(file_path, 'w') as file: + json.dump(current_index, file) + + print(f"Index appended to {file_path}") + + def save_index(self, file_path): + current_index = {} + + for class_uri in self.class_index: + if class_uri not in current_index: + current_index[class_uri] = list(self.class_index[class_uri]) + else: + current_index[class_uri] = list(set(current_index[class_uri]) | self.class_index[class_uri]) + + with open(file_path, 'w') as file: + json.dump(current_index, file) + + def load_index(self, file_path): + if os.path.exists(file_path): + with open(file_path, 'r') as file: + self.class_index = {k: set(v) for k, v in json.load(file).items()} + print(f"Index loaded from {file_path}") + else: + print(f"No index file found at {file_path}") +# usage +if __name__ == "__main__": + # Define the Blazegraph base URL + blazegraph_base_url = "http://localhost:8080/blazegraph" + + # Define the namespace + namespaces = ["namespace_kin","namespace_compchem"] + + kgs = BuildKGIndex() + + for i in range(len(namespaces)): + # Construct the SPARQL endpoint URL for the specified namespace + endpoint_url = f"{blazegraph_base_url}/namespace/{namespaces[i]}/sparql" + print(f"Start processing endpoint: {endpoint_url}") + kgs.start_processing(endpoint_url) + + # current_index=kgs.get_index() + saved_datafile='C:/Users/printer_admin/Downloads/KGs/c2ep_invindex.json' + kgs.save_index(saved_datafile) + print(f"Completed and saved in {saved_datafile}.") diff --git a/JPS_BASE_LIB/python_federated_query/backup_extra/build_c2file_indx.py b/JPS_BASE_LIB/python_federated_query/backup_extra/build_c2file_indx.py new file mode 100644 index 00000000000..9449963f129 --- /dev/null +++ b/JPS_BASE_LIB/python_federated_query/backup_extra/build_c2file_indx.py @@ -0,0 +1,66 @@ +from rdflib import Graph +import os +import json + +class BuildKGIndex: + + def __init__(self, directory): + self.filecount=0 + self.triplecount=0 + self.class_index = {} + self.directory=directory + + def start_processing(self): + for root, dirs, files in os.walk(self.directory): #walks along files and sub-folders + for filename in files: + if filename.endswith(".owl"): # Assuming files are owl files + file_path = os.path.join(root, filename) + self.filecount+=1 + print(f"{self.filecount}. Processing: {file_path}") + classes=self.extract_concepts_from_owl_file(file_path) + self.build_inverted_index(classes,file_path) + return self.class_index + + def extract_concepts_from_owl_file(self,owl_file): + graph = Graph() + graph.parse(owl_file, format="xml") # Adjust format if needed + rdf_type = '#type' + triple_counter = 0 + classes = set() + # Extract classes (concepts) + for subject, predicate, obj in graph: #triples in graph + if predicate.endswith(rdf_type) and not obj.endswith(rdf_type): + classes.add(obj) + triple_counter+=1 + print(f"Processed {triple_counter} triples from file {owl_file}") + self.triplecount+=triple_counter + return classes + + def build_inverted_index(self,classes,filename): + for class_name in classes: + if class_name not in self.class_index: + self.class_index[class_name] = [] + self.class_index[class_name].append(filename) + + def save_inverted_index(self, inverted_index, file_path): + with open(file_path, 'w') as f: + json.dump(inverted_index, f, indent=4) + + def get_triplecount(self): + return self.triplecount + +# usage +if __name__ == "__main__": + directory = "C:/Users/printer_admin/Downloads/KGs/" + kgs = BuildKGIndex(directory) + kg_index=kgs.start_processing() + kgs.save_inverted_index(kg_index, os.path.join(directory, 'inverted_index.json')) + total_triple_processed=kgs.get_triplecount() + print(f"Completed. Total Number of Processed Triples: {total_triple_processed}") + + # for class_name, related_files in kg_index.items(): + # print(f"Class: {class_name}") + # print(f"Related Files: {related_files}") + # print() + + \ No newline at end of file diff --git a/JPS_BASE_LIB/python_federated_query/backup_extra/build_cp2file_indx.py b/JPS_BASE_LIB/python_federated_query/backup_extra/build_cp2file_indx.py new file mode 100644 index 00000000000..c7fa1bb64f3 --- /dev/null +++ b/JPS_BASE_LIB/python_federated_query/backup_extra/build_cp2file_indx.py @@ -0,0 +1,102 @@ +from rdflib import Graph +import os +import json +import warnings + +class BuildKGIndex: + + def __init__(self, directory): + self.filecount=0 + self.triplecount=0 + self.class_index = {} + self.cp_index = {} + self.directory=directory + self.graph=Graph() + warnings.filterwarnings("ignore", message=".*does not look like a valid URI.*") + + def __del__(self): + warnings.resetwarnings() + + def start_processing(self): + sparql_query = """ + SELECT ?class ?property + WHERE { + ?subject ?property ?object . + ?subject a ?class . + FILTER ( + isIRI(?class) && + ?class != owl:Ontology && + ?class != owl:Class && + ?class != owl:NamedIndividual && + !isBlank(?property) && + isIRI(?property) && + ?property != rdf:type && + ?property != rdfs:label + ) + } + """ + + for root, dirs, files in os.walk(self.directory): #walks along files and sub-folders + for filename in files: + if filename.endswith(".owl"): # Assuming files are owl files + file_path = os.path.join(root, filename) + self.filecount+=1 + print(f"{self.filecount}. Processing: {file_path}") + self.create_graph_from_owl_file(file_path) + concept_property_pair_results=self.get_sparql_result(sparql_query) + self.build_concept2property_invindex(concept_property_pair_results, file_path) + + return self.class_index + + def create_graph_from_owl_file(self,owl_file): + self.graph = Graph() + self.graph.parse(owl_file, format="xml") # Adjust format if needed + + def get_sparql_result(self, sparql_query): + return self.graph.query(sparql_query) + + def extract_concepts_property_pair(self, concept_property_pair_results): + for row in concept_property_pair_results: + class_uri = row["class"] + property_uri = row["property"] + print(f"Class: {class_uri}, Property: {property_uri}") + + def build_concept2property_invindex(self, concept_property_pair_results, filename): + for cp_pair in concept_property_pair_results: + class_uri = cp_pair["class"] + property_uri = cp_pair["property"] + + # Check if class_uri and property_uri are not empty + if not class_uri or not property_uri: + continue + + # Check if class_uri already exists in cp_index + if class_uri not in self.cp_index: + self.cp_index[class_uri] = {} + + # Check if property_uri already exists for class_uri + if property_uri not in self.cp_index[class_uri]: + self.cp_index[class_uri][property_uri] = [] + + # Append filename to the list of filenames for property_uri + if filename not in self.cp_index[class_uri][property_uri]: + self.cp_index[class_uri][property_uri].append(filename) + + def save_concept2property_invindex(self, file_path): + with open(file_path, 'w') as f: + json.dump(self.cp_index, f, indent=4) + +# usage +if __name__ == "__main__": + directory = "C:/Users/printer_admin/Downloads/KGs/" + kgs = BuildKGIndex(directory) + kg_index=kgs.start_processing() + kgs.save_concept2property_invindex(os.pa0th.join(directory, 'cp_invindex.json')) + print(f"Completed.") + + # for class_name, related_files in kg_index.items(): + # print(f"Class: {class_name}") + # print(f"Related Files: {related_files}") + # print() + + \ No newline at end of file diff --git a/JPS_BASE_LIB/python_federated_query/backup_extra/build_p2endpoints_indx.py b/JPS_BASE_LIB/python_federated_query/backup_extra/build_p2endpoints_indx.py new file mode 100644 index 00000000000..2d8bc0a8d2b --- /dev/null +++ b/JPS_BASE_LIB/python_federated_query/backup_extra/build_p2endpoints_indx.py @@ -0,0 +1,115 @@ +from rdflib import Graph, Namespace +from rdflib.plugins.stores.sparqlstore import SPARQLStore +import os +import json +import requests +from tqdm import tqdm + +class BuildP2EIndex: + + def __init__(self): + self.triplecount=0 + self.property_index = {} + self.graph = Graph() + + + def start_processing(self,endpoint_url): + sparql_query = """ + PREFIX rdf: + PREFIX owl: + PREFIX rdfs: + + SELECT ?property + WHERE { + ?subject ?property ?object . + FILTER ( + !isBlank(?property) && + isIRI(?property) && + ?property != rdf:type && + ?property != rdfs:label + ) + } + """ + + try: + # Define the HTTP headers + headers = { + "Accept": "application/sparql-results+json" # Specify the format of the response + } + + # Send the SPARQL query to the endpoint + response = requests.post(endpoint_url, data={"query": sparql_query}, headers=headers) + + + # Check if the request was successful (status code 200) + if response.status_code == 200: + # Parse the JSON response + json_response = response.json() + + progress_bar = tqdm(total=len(json_response["results"]["bindings"]), desc="Processing triples", unit="triple") + # Process the results + for binding in json_response["results"]["bindings"]: + property_uri = binding["property"]["value"] + + # print(f"property: {property_uri}") + + # Check if property_uri is not empty + if (not property_uri): + continue + + # Check if property_uri already exists in cp_index + if property_uri not in self.property_index: + self.property_index[property_uri] = [] + + if endpoint_url not in self.property_index[property_uri]: + self.property_index[property_uri].append(endpoint_url) + + # self.build_inverted_index(propertyes,file_path) + # if endpoint_url not in self.property_index[property_uri]: + # self.property_index[property_uri].append(endpoint_url) + + # Update progress bar for each triple processed + progress_bar.update(1) + + progress_bar.close() + else: + print("Error:", response.status_code, response.text) + + # # Query the SPARQL endpoint with the SPARQL query + # self.graph.parse(self.endpoint_url, format='xml') + + # # Execute the SPARQL query and process the results + # property_property_pair_results = self.graph.query(sparql_query) + # self.build_property2property_invindex(property_property_pair_results, self.endpoint_url) + except Exception as e: + print("Error:", e) + return None + + return self.property_index + + def get_sparql_result(self, sparql_query): + return self.graph.query(sparql_query) + + def save_property2endpoint_invindex(self, file_path): + with open(file_path, 'w') as f: + json.dump(self.property_index, f, indent=4) + +# usage +if __name__ == "__main__": + # Define the Blazegraph base URL + blazegraph_base_url = "http://localhost:8080/blazegraph" + + # Define the namespace + namespaces = ["namespace_1","namespace_2"] + + kgs = BuildP2EIndex() + + for i in range(len(namespaces)): + # Construct the SPARQL endpoint URL for the specified namespace + endpoint_url = f"{blazegraph_base_url}/namespace/{namespaces[i]}/sparql" + print(f"Start processing endpoint: {endpoint_url}") + kgs.start_processing(endpoint_url) + + saved_datafile='C:/Users/printer_admin/Downloads/KGs/p2e_invindex.json' + kgs.save_property2endpoint_invindex(saved_datafile) + print(f"Completed and saved in {saved_datafile}.") diff --git a/JPS_BASE_LIB/python_federated_query/build_cp2endpoints_indx.py b/JPS_BASE_LIB/python_federated_query/build_cp2endpoints_indx.py new file mode 100644 index 00000000000..19e1e2ccaed --- /dev/null +++ b/JPS_BASE_LIB/python_federated_query/build_cp2endpoints_indx.py @@ -0,0 +1,292 @@ +from rdflib import Graph, Namespace, URIRef +from rdflib.plugins.stores.sparqlstore import SPARQLStore +import os +import json +import requests +from tqdm import tqdm + +class BuildKGIndex: + + def __init__(self): + self.class_index_file_path="" + self.property_index_file_path="" + self.cp_index_file_path="" + + self.triplecount=0 + self.class_index = {} + self.property_index = {} + self.cp_index = {} + self.graph = Graph() + + def set_index_location(self,index_dir): + if(index_dir.strip()[-1]=="/"): + index_dir=index_dir.strip() + else: + index_dir=index_dir.strip()+"/" + + self.class_index_file_path= index_dir+"cinv.indx" + self.property_index_file_path=index_dir+"pinv.indx" + self.cp_index_file_path=index_dir+"cpinv.indx" + + def start_processing(self,endpoint_url): + sparql_query = """ + PREFIX rdf: + PREFIX owl: + PREFIX rdfs: + + SELECT ?class ?property + WHERE { + { + ?subject ?property ?object . + ?subject a ?class . + FILTER ( + isIRI(?class) && + ?class != owl:Ontology && + ?class != owl:Class && + ?class != owl:NamedIndividual && + !isBlank(?property) && + isIRI(?property) && + ?property != rdf:type && + ?property != rdfs:label && + ?property != rdfs:comment + ) + } + UNION + { + ?subject ?property ?object . + ?object a ?class . + FILTER ( + isIRI(?class) && + ?class != owl:Ontology && + ?class != owl:Class && + ?class != owl:NamedIndividual && + !isBlank(?property) && + isIRI(?property) && + ?property != rdf:type && + ?property != rdfs:label && + ?property != rdfs:comment + ) + } + } + + """ + + try: + # Define the HTTP headers + headers = { + "Accept": "application/sparql-results+json" # Specify the format of the response + } + + # Send the SPARQL query to the endpoint + response = requests.post(endpoint_url, data={"query": sparql_query}, headers=headers) + + + # Check if the request was successful (status code 200) + if response.status_code == 200: + # Parse the JSON response + json_response = response.json() + + progress_bar = tqdm(total=len(json_response["results"]["bindings"]), desc="Processing triples", unit="triple") + # Process the results + for binding in json_response["results"]["bindings"]: + class_uri = binding["class"]["value"] + property_uri = binding["property"]["value"] + + # Creating Class-to-endpoint inverted index + if (class_uri): + if class_uri not in self.class_index: + self.class_index[class_uri] = [] + + if endpoint_url not in self.class_index[class_uri]: + self.class_index[class_uri].append(endpoint_url) + + # Creating Property-to-endpoint inverted index + if (property_uri): + if property_uri not in self.property_index: + self.property_index[property_uri] = [] + + if endpoint_url not in self.property_index[property_uri]: + self.property_index[property_uri].append(endpoint_url) + + # Creating Class-Property-to-endpoint inverted index + if ((not class_uri) or (not property_uri)): + continue + + if class_uri not in self.cp_index: + self.cp_index[class_uri] = {} + + # Check if property_uri already exists for class_uri + if property_uri not in self.cp_index[class_uri]: + self.cp_index[class_uri][property_uri] = [] + + # Append filename to the list of filenames for property_uri + if endpoint_url not in self.cp_index[class_uri][property_uri]: + self.cp_index[class_uri][property_uri].append(endpoint_url) + + # Update progress bar for each triple processed + progress_bar.update(1) + + progress_bar.close() + else: + print("Error:", response.status_code, response.text) + + # # Query the SPARQL endpoint with the SPARQL query + # self.graph.parse(self.endpoint_url, format='xml') + except Exception as e: + print("Error:", e) + return None + + return self.class_index, self.property_index, self.cp_index + + #get indices + def get_c2e_index(self): + return self.class_index + + def get_p2e_index(self): + return self.property_index + + def get_cp2e_index(self): + return self.cp_index + + #update index on new triples insertion + def add_triples_and_update_index(self, new_triples, endpoint_url): + #modified_classes = [] + #modified_properties = [] + #modified_cp = set() + + for s, p, o in new_triples: + self.graph.add((s, p, o)) + + if p == URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"): + class_uri = str(o) + last_token=self.get_substring_from_last(class_uri) + if(last_token=="Ontology" or last_token=="Class" or last_token=="NamedIndividual"): + continue + + if class_uri not in self.class_index: + self.class_index[class_uri] = [] + + self.class_index[class_uri].append(endpoint_url) + #if class_uri not in modified_classes: + # modified_classes.append(class_uri) + else: + property_uri = str(p) + last_token=self.get_substring_from_last(property_uri) + if(last_token=="label"): + continue + + if property_uri not in self.property_index: + self.property_index[property_uri] = [] + + self.property_index[property_uri].append(endpoint_url) + #if property_uri not in modified_properties: + # modified_properties.append(property_uri) + + for s, p, o in new_triples: + if(str(p) in self.property_index): + property_uri=str(p) + if (str(s) in self.class_index): + class_uri=str(s) + + if class_uri not in self.cp_index: + self.cp_index[class_uri] = {} + if property_uri not in self.cp_index[class_uri]: + self.cp_index[class_uri][property_uri] = [] + if endpoint_url not in self.cp_index[class_uri][property_uri]: + self.cp_index[class_uri][property_uri].append(endpoint_url) + + if(str(o) in self.class_index): + class_uri=str(o) + + if class_uri not in self.cp_index: + self.cp_index[class_uri] = {} + if property_uri not in self.cp_index[class_uri]: + self.cp_index[class_uri][property_uri] = [] + if endpoint_url not in self.cp_index[class_uri][property_uri]: + self.cp_index[class_uri][property_uri].append(endpoint_url) + + self.triplecount += len(new_triples) + print(f"Added {len(new_triples)} triples and updated the index.") + + #return modified_classes + + #Load index from file + def load_indices(self): + self.load_class_index() + self.load_property_index() + self.load_concept2property_index() + + def load_class_index(self): + try: + with open(self.class_index_file_path, 'r') as file: + self.class_index = json.load(file) + print(f"Class Index loaded from {self.class_index_file_path}") + except FileNotFoundError: + print(f"File '{self.class_index_file_path}' not found.") + except json.JSONDecodeError: + print(f"Error decoding JSON data from '{self.class_index_file_path}'.") + + def load_property_index(self): + try: + with open(self.property_index_file_path, 'r') as file: + self.property_index = json.load(file) + print(f"Property Index loaded from {self.property_index_file_path}") + except FileNotFoundError: + print(f"File '{self.property_index_file_path}' not found.") + except json.JSONDecodeError: + print(f"Error decoding JSON data from '{self.property_index_file_path}'.") + + def load_concept2property_index(self): + try: + # Open the file for reading + with open(self.cp_index_file_path, 'r') as file: + # Load the JSON data from the file into the index variable + self.cp_index = json.load(file) + print("Class-Property multilevel inverted index loaded successfully.") + except FileNotFoundError: + print(f"File '{self.cp_index_file_path}' not found.") + except json.JSONDecodeError: + print(f"Error decoding JSON data from '{self.cp_index_file_path}'.") + + #Save index file + def save_indices(self): + self.save_class_index() + self.save_property_index() + self.save_cp_index() + + def save_class_index(self): + with open(self.class_index_file_path, 'w') as file: + json.dump(self.class_index, file, indent=4) + + def save_property_index(self): + with open(self.property_index_file_path, 'w') as file: + json.dump(self.property_index, file, indent=4) + + def save_cp_index(self): + with open(self.cp_index_file_path, 'w') as file: + json.dump(self.cp_index, file, indent=4) + + def get_substring_from_last(self,string): + return string.rsplit('/', 1)[-1].rsplit('#', 1)[-1] + +# usage +if __name__ == "__main__": + # Define the Blazegraph base URL + blazegraph_base_url = "http://localhost:8080/blazegraph" + + # Define the namespace + #namespaces = ["namespace_kin","namespace_compchem","namespace_uken"] + namespaces = ["namespace_all"] + index_location='C:/Users/printer_admin/Downloads/KGs/tests' + + kgs = BuildKGIndex() + kgs.set_index_location(index_location) + + for i in range(len(namespaces)): + # Construct the SPARQL endpoint URL for the specified namespace + endpoint_url = f"{blazegraph_base_url}/namespace/{namespaces[i]}/sparql" + print(f"Start processing endpoint: {endpoint_url}") + kgs.start_processing(endpoint_url) + + kgs.save_indices() + print(f"Completed and index-files are saved in {index_location}.") diff --git a/JPS_BASE_LIB/python_federated_query/load_double_indx.py b/JPS_BASE_LIB/python_federated_query/load_double_indx.py new file mode 100644 index 00000000000..930d7a63c2a --- /dev/null +++ b/JPS_BASE_LIB/python_federated_query/load_double_indx.py @@ -0,0 +1,91 @@ +import json +import matplotlib.pyplot as plt + +class LoadKGIndex: + def __init__(self): + self.filecount=0 + self.triplecount=0 + self.cp_index = {} + self.cp_name=[] + self.cp_files=[] + + def load_cp_index(self, file_path): + with open(file_path, 'r') as f: + self.cp_index = json.load(f) + return self.cp_index + + def load_concept2property_invindex(self, file_path): + try: + # Open the file for reading + with open(file_path, 'r') as file: + # Load the JSON data from the file into the index variable + self.cp_index = json.load(file) + print("Class-Property multilevel inverted index loaded successfully.") + except FileNotFoundError: + print(f"File '{file_path}' not found.") + except json.JSONDecodeError: + print(f"Error decoding JSON data from '{file_path}'.") + + def get_cp_index(self): + return self.cp_index + + def search_concept(self, class_name): + if class_name in self.cp_index: + return self.cp_index[class_name] + else: + return None + + def analyse(self): + for class_name, prop_dict in self.cp_index.items(): + for prop, files in prop_dict.items(): + class_name_short = self.get_substring_from_last(class_name) + prop_name_short = self.get_substring_from_last(prop) + key = f"{class_name_short}-{prop_name_short}" + self.cp_name.append(key) + self.cp_files.append(len(files)) + + def print_key_value_stats(self): + cp_count = 0 + file_count = 0 + avg_files_per_cp = 0.0 + + print(f"Total number of class-property key: {len(self.cp_name)}") + for i in range(len(self.cp_name)): + file_count += self.cp_files[i] + cp_count += 1 + print(f"{self.cp_name[i]} : {self.cp_files[i]}") + + avg_files_per_cp = file_count/cp_count + print(f"Total class-property:{cp_count}; Average files per class-property: {avg_files_per_cp}") + + def get_substring_from_last(self,string): + return string.rsplit('/', 1)[-1].rsplit('#', 1)[-1] + + def bar_plt(self): + # Create bar chart + plt.figure(figsize=(8, 6)) + plt.bar(self.cp_name, self.cp_files, color='skyblue') + + # Add title and labels + plt.title('`Class-property` vs `number of file` distribution') + plt.xlabel('Class-property') + plt.ylabel('File Frequency') + + # Show plot + plt.xticks(rotation=45) + plt.tight_layout() + plt.show() + +# usage +if __name__ == "__main__": + #index_file = "C:/Users/printer_admin/Downloads/KGs/ontokin/cp_index_170.json" + #index_file = "C:/Users/printer_admin/Downloads/KGs/cp_ep_invindex.json" + index_file = "C:/Users/printer_admin/Downloads/KGs/cpinv.indx" + kgs = LoadKGIndex() + + index=kgs.load_cp_index(index_file) + # files=kgs.search_concept("http://theworldavatar.com/kb/ontokin/ontokin.owl#Element") + kgs.analyse() + kgs.print_key_value_stats() + kgs.bar_plt() + \ No newline at end of file diff --git a/JPS_BASE_LIB/python_federated_query/load_single_indx.py b/JPS_BASE_LIB/python_federated_query/load_single_indx.py new file mode 100644 index 00000000000..4a62f5d688e --- /dev/null +++ b/JPS_BASE_LIB/python_federated_query/load_single_indx.py @@ -0,0 +1,71 @@ +import json +import matplotlib.pyplot as plt + +class LoadKGIndex: + def __init__(self): + self.inverted_index = {} + self.key_names=[] + self.key_endpoints=[] + + def load_inverted_index(self, file_path): + with open(file_path, 'r') as f: + self.inverted_index = json.load(f) + return self.inverted_index + + def get_inverted_index(self): + return self.inverted_index + + def search_concept(self, key_name): + if key_name in self.inverted_index: + return self.inverted_index[key_name] + else: + return None + + def analyse(self): + for key_name, endpoints in self.inverted_index.items(): + self.key_names.append(self.get_substring_from_last(key_name)) + self.key_endpoints.append(len(endpoints)) + + def get_substring_from_last(self,string): + return string.rsplit('/', 1)[-1].rsplit('#', 1)[-1] + + def print_key_value_stats(self): + key_count = 0 + endpoint_count = 0 + avg_endpoints_per_key = 0.0 + + print(f"Total number of keys: {len(self.key_names)}") + for i in range(len(self.key_names)): + endpoint_count += self.key_endpoints[i] + key_count += 1 + print(f"{self.key_names[i]} : {self.key_endpoints[i]}") + + avg_endpoints_per_key = endpoint_count/key_count + print(f"Total keys:{key_count}; Average endpoints per key: {avg_endpoints_per_key}") + + def bar_plt(self): + # Create bar chart + plt.figure(figsize=(8, 6)) + plt.bar(self.key_names, self.key_endpoints, color='skyblue') + + # Add title and labels + plt.title('Key-to-endpoints distribution') + plt.xlabel('Key') + plt.ylabel('Key Frequency') + + # Show plot + plt.xticks(rotation=45) + plt.tight_layout() + plt.show() + +# usage +if __name__ == "__main__": + #index_file = "C:/Users/printer_admin/Downloads/KGs/ontokin/inverted_index_170.json" + index_file = "C:/Users/printer_admin/Downloads/KGs/p2e_invindex.json" + kgs = LoadKGIndex() + + index=kgs.load_inverted_index(index_file) + # files=kgs.search_concept("http://theworldavatar.com/kb/ontokin/ontokin.owl#Element") + kgs.analyse() + kgs.print_key_value_stats() + kgs.bar_plt() diff --git a/JPS_BASE_LIB/python_federated_query/process_sparql.py b/JPS_BASE_LIB/python_federated_query/process_sparql.py new file mode 100644 index 00000000000..d050df54e2d --- /dev/null +++ b/JPS_BASE_LIB/python_federated_query/process_sparql.py @@ -0,0 +1,123 @@ +from rdflib import Graph, Namespace +from rdflib.plugins.stores.sparqlstore import SPARQLStore +import os +import json +import requests +from tqdm import tqdm +from SPARQLWrapper import SPARQLWrapper, JSON, CSV + +class ProcessSparql: + + def __init__(self): + self.triplecount=0 + + def run_sparql_json(self,base_url, namespace, sparql_query): + # Construct the full URL to the Blazegraph namespace + full_url = f"{base_url}/namespace/{namespace}/sparql" + + # Initialize the SPARQLWrapper with the full URL + sparql = SPARQLWrapper(full_url) + + # Set the query and return format + sparql.setQuery(sparql_query) + sparql.setReturnFormat(JSON) + + try: + # Execute the query and return the results + results = sparql.query().convert() + return results + except Exception as e: + print(f"An error occurred: {e}") + return None + + def run_sparql_csv(self,base_url, namespace, sparql_query): + # Construct the full URL to the Blazegraph namespace + full_url = f"{base_url}/namespace/{namespace}/sparql" + + # Initialize the SPARQLWrapper + # with the full URL + sparql = SPARQLWrapper(full_url) + + # Set the query and return format + sparql.setQuery(sparql_query) + sparql.setReturnFormat(JSON) + + try: + # Execute the query and return the results + results = sparql.query().convert() + return results + except Exception as e: + print(f"An error occurred: {e}") + return None + + +# usage +if __name__ == "__main__": + # Define the Blazegraph server's base url + base_url = "http://localhost:8080/blazegraph" + namespace = "namespace_all" + # sparql_query = """ + # PREFIX rdf: + # PREFIX owl: + # PREFIX rdfs: + + # SELECT ?class ?property + # WHERE { + # ?subject ?property ?object . + # ?subject a ?class . + # FILTER ( + # isIRI(?class) && + # ?class != owl:Ontology && + # ?class != owl:Class && + # ?class != owl:NamedIndividual && + # !isBlank(?property) && + # isIRI(?property) && + # ?property != rdf:type && + # ?property != rdfs:label + # ) + # } + # """ + sparql_query=""" + PREFIX rdf: + PREFIX owl: + PREFIX pt: + PREFIX OntoKin: + PREFIX rdfs: + + SELECT ?identifier ?atomicMass ?atomicMassUnits + WHERE { + ?element1 rdf:type pt:Element . + BIND(STRAFTER(STR(?element1), "#") AS ?identifier) + ?element2 rdf:type OntoKin:Element . + ?element2 rdfs:label ?identifier1 . + ?element2 OntoKin:hasAtomicMass ?atomicMass . + ?element2 OntoKin:hasAtomicMassUnits ?atomicMassUnits . + FILTER(?identifier = ?identifier1) + } + """ + + proc = ProcessSparql() + results=proc.run_sparql_json(base_url, namespace, sparql_query) + + triples=[] + if results: + progress_bar = tqdm(total=len(results["results"]["bindings"]), desc="Processing triples", unit="triple") + # Process the results + for result in results["results"]["bindings"]: + atuple=[] + for var in result: + atuple.append(result[var]['value']) + + # Update progress bar for each triple processed + progress_bar.update(1) + + if atuple not in triples: + triples.append(atuple) + + progress_bar.close() + + print(len(triples)) + for i in range(len(triples)): + atuple=triples[i] + for j in range(len(atuple)): + print(f"{atuple[j]}") diff --git a/JPS_BASE_LIB/python_tif_csv/csv2tif.py b/JPS_BASE_LIB/python_tif_csv/csv2tif.py new file mode 100644 index 00000000000..1995daf671c --- /dev/null +++ b/JPS_BASE_LIB/python_tif_csv/csv2tif.py @@ -0,0 +1,77 @@ +import rasterio +import numpy as np +import pandas as pd +from tqdm import tqdm +import re + +metadata_file = "C:/Users/printer_admin/Downloads/tz_metadata.txt" + +# Load CSV +print("Loading CSV ... ") +df = pd.read_csv('C:/Users/printer_admin/Downloads/satz_01.csv') +print("Completed loading CSV") + +# Ensure required columns exist +required_columns = {"longitude", "latitude", "value"} +if not required_columns.issubset(df.columns): + raise ValueError(f"Missing required columns in CSV: {df.columns}") + +# Read metadata +with open(metadata_file, "r") as meta_file: + metadata = meta_file.readlines() + +# Extract transform values safely +transform_line = metadata[1].strip() +transform_line = re.search(r"\[(.*?)\]", transform_line) +if transform_line: + transform_line=transform_line.group(1) + transform_values = [float(num) for num in transform_line.split(",")] +else: + raise ValueError("Transform data not found in metadata file.") +transform = rasterio.transform.Affine(*transform_values) + +crs_wkt = metadata[0].split("CRS: ")[1].strip() +nodata_value = float(0) +print("Completed analysing metadata") + +print("Processing pixel array ... ") +# Extract unique x and y coordinates +x_coords = np.sort(df["longitude"].unique()) +y_coords = np.sort(df["latitude"].unique())[::-1] # Reverse for raster order + +# Create a 2D array for pixel values +pixel_values = np.full((len(y_coords), len(x_coords)), nodata_value, dtype=np.int16) +print("Completed processing pixel array") + +# Create mappings for fast lookup +x_map = {v: i for i, v in enumerate(x_coords)} +y_map = {v: i for i, v in enumerate(y_coords)} + +# Drop NaN rows (prevents index errors) +df = df.dropna(subset=["longitude", "latitude", "value"]) + +# Convert coordinates to indices +df["x_index"] = df["longitude"].map(x_map) +df["y_index"] = df["latitude"].map(y_map) + +# Assign values to pixel array +pixel_values[df["y_index"], df["x_index"]] = df["value"].astype(np.int16) + +# Define raster metadata +profile = { + "driver": "GTiff", + "dtype": "int16", + "nodata": nodata_value, + "width": len(x_coords), + "height": len(y_coords), + "count": 1, + "crs": crs_wkt, + "transform": transform +} + +# Write TIFF +output_tif = 'C:/Users/printer_admin/Downloads/out_satz_01.tif' +with rasterio.open(output_tif, 'w', **profile) as dst: + dst.write(pixel_values, 1) + +print(f"Raster saved successfully at {output_tif}") diff --git a/JPS_BASE_LIB/python_tif_csv/tif2csv.py b/JPS_BASE_LIB/python_tif_csv/tif2csv.py new file mode 100644 index 00000000000..0b19f5fabc5 --- /dev/null +++ b/JPS_BASE_LIB/python_tif_csv/tif2csv.py @@ -0,0 +1,47 @@ +import rasterio +import csv +import numpy as np +from tqdm import tqdm + +# Input GeoTIFF file +tiff_file = "C:/Users/printer_admin/Downloads/small_area.tif" +csv_file = "C:/Users/printer_admin/Downloads/satz_01.csv" + +# Open the raster file +with rasterio.open(tiff_file) as src: + band = src.read(1) # Read first band (grayscale values) + transform = src.transform # Affine transformation + nodata_value = src.nodata # NoData value + crs = src.crs # Coordinate Reference System + + # Open CSV file for writing + with open(csv_file, mode="w", newline="") as file: + writer = csv.writer(file) + writer.writerow(["longitude", "latitude", "value"]) # CSV Header + + # Use progress bar + progress_bar = tqdm(total=band.shape[0], desc="Processing", unit="rows") + + # Convert pixel indices to geographic coordinates + for row in range(band.shape[0]): + for col in range(band.shape[1]): + x, y = transform * (col, row) # Convert pixel to geographic coordinates + value = band[row, col] + + # Preserve NoData value + if value == nodata_value: + continue # Skip NoData values (optional) + + writer.writerow([x, y, value]) + progress_bar.update(1) + +transform_list = list(transform) +# Save metadata separately +metadata_file = "C:/Users/printer_admin/Downloads/tz_metadata.txt" +with open(metadata_file, "w") as meta_file: + meta_file.write(f"CRS: {crs.to_wkt()}\n") + meta_file.write(f"Transform: {transform_list}\n") + meta_file.write(f"NoData: {nodata_value}\n") + +print(f"✅ TIFF converted to CSV: {csv_file}") +print(f"📜 Metadata saved to: {metadata_file}") diff --git a/JPS_BASE_LIB/src/main/java/uk/ac/cam/cares/jps/base/query/fedq/BuildInvertedIndex.java b/JPS_BASE_LIB/src/main/java/uk/ac/cam/cares/jps/base/query/fedq/BuildInvertedIndex.java new file mode 100644 index 00000000000..bf05ef0d7d3 --- /dev/null +++ b/JPS_BASE_LIB/src/main/java/uk/ac/cam/cares/jps/base/query/fedq/BuildInvertedIndex.java @@ -0,0 +1,368 @@ +package uk.ac.cam.cares.jps.base.query.fedq; + +import com.google.gson.*; +import com.google.gson.reflect.TypeToken; +import org.apache.jena.graph.Triple; +import org.apache.jena.query.Query; +import org.apache.jena.query.QueryExecution; +import org.apache.jena.query.QueryFactory; +import org.apache.jena.query.QuerySolution; +import org.apache.jena.rdf.model.*; +import org.apache.jena.sparql.exec.http.QueryExecutionHTTP; +import org.apache.jena.vocabulary.RDF; +import org.apache.jena.query.ResultSet; + +import java.io.*; +import java.util.*; + +/** + * The class wraps some functionality to- + * 1. create inverted index on a number of endpoints + * a) class to endpoint inverted index + * b) property to endpoint inverted index + * c) class-to property to endpoint inverted inedx + * 2. update inverted index on insertion of new set of triple into an endpoint + * 3. load inverted index from a file data into a memory map + * 4. save inverted index from memory map to storage file + */ + +public class BuildInvertedIndex { + private String classIndexFilePath = ""; + private String propertyIndexFilePath = ""; + private String cpIndexFilePath = ""; + + // private int tripleCount = 0; + private HashSet stop_classes = new HashSet<>(); + private HashSet stop_properties = new HashSet<>(); + private Map> classIndex = new HashMap<>(); + private Map> propertyIndex = new HashMap<>(); + private Map>> cpIndex = new HashMap<>(); + private Model model = ModelFactory.createDefaultModel(); + + /** + * indexDir specify the root directory of the indices + * + * @param indexDir + * @return + */ + public void setIndexLocation(String indexDir) { + if (!indexDir.endsWith("/")) { + indexDir = indexDir.trim() + "/"; + } + this.classIndexFilePath = indexDir + "cinv.indx"; + this.propertyIndexFilePath = indexDir + "pinv.indx"; + this.cpIndexFilePath = indexDir + "cpinv.indx"; + } + + /** + * createInvertedIndex works on an endpoint to retrieve class and properties through + * sparqlQuey to create an index + * + * @param endpointUrl, + * @param sparqlQuery + * @return + */ + public void createInvertedIndex(String endpointUrl, String sparqlQuery) { + int counter = 0; + // Create a Query object + Query query = QueryFactory.create(sparqlQuery); + + // Create a QueryExecution using the builder + try (QueryExecution qexec = QueryExecutionHTTP.create() + .endpoint(endpointUrl) + .query(query) + .build()) { + ResultSet results = qexec.execSelect(); + while (results.hasNext()) { + QuerySolution soln = results.nextSolution(); + String classUri = soln.get("class").toString(); + String propertyUri = soln.get("property").toString(); + + if (this.stop_classes.contains(classUri)) + continue; + if (!classUri.isEmpty()) { + classIndex.computeIfAbsent(classUri, k -> new HashSet<>()).add(endpointUrl); + } + + if (this.stop_properties.contains(propertyUri)) + continue; + if (!propertyUri.isEmpty()) { + propertyIndex.computeIfAbsent(propertyUri, k -> new HashSet<>()).add(endpointUrl); + } + + if (!classUri.isEmpty() && !propertyUri.isEmpty()) { + cpIndex.computeIfAbsent(classUri, k -> new HashMap<>()) + .computeIfAbsent(propertyUri, k -> new HashSet<>()) + .add(endpointUrl); + } + + if (++counter % 1000000 == 0) + System.out.print(counter + " "); + } + } catch (Exception e) { + e.printStackTrace(); + } + } + + public Map> getClassIndex() { + return classIndex; + } + + public Map> getPropertyIndex() { + return propertyIndex; + } + + public Map>> getCpIndex() { + return cpIndex; + } + + /** + * it loads stop-CPs: list of common classes & properties + * + * @param stop_cps_file + * @return + */ + public void loadStopCPs(String stop_cps_file) { + Map> stop_cps = new HashMap<>(); + try (Reader reader = new FileReader(stop_cps_file)) { + stop_cps = new Gson().fromJson(reader, new TypeToken>>() { + }.getType()); + this.stop_classes = stop_cps.get("classes"); + this.stop_properties = stop_cps.get("properties"); + System.out.println("Stop-classes & Stop-properties loaded successfully."); + } catch (FileNotFoundException e) { + System.err.println("File '" + stop_cps_file + "' not found."); + } catch (IOException e) { + e.printStackTrace(); + } + } + + /** + * it specifies update of indices on new set of triple to be inserted into an + * endpoint + * + * @param newTriples + * @param endpoints + * @return + */ + public void addTriplesAndUpdateIndex(List newTriples, String endpointUrl) { + for (Triple triple : newTriples) { + model.add(model.asStatement(triple)); + + if (triple.getPredicate().equals(RDF.type.asNode())) { + String classUri = triple.getObject().getURI(); + if (this.stop_classes.contains(classUri)) + continue; + classIndex.computeIfAbsent(classUri, k -> new HashSet<>()).add(endpointUrl); + } else { + String propertyUri = triple.getPredicate().getURI(); + if (this.stop_properties.contains(propertyUri)) + continue; + propertyIndex.computeIfAbsent(propertyUri, k -> new HashSet<>()).add(endpointUrl); + } + } + + for (Triple triple : newTriples) { + if (propertyIndex.containsKey(triple.getPredicate().getURI())) { + String propertyUri = triple.getPredicate().getURI(); + if (classIndex.containsKey(triple.getSubject().getURI())) { + String classUri = triple.getSubject().getURI(); + if (this.stop_classes.contains(classUri)) + continue; + + cpIndex.computeIfAbsent(classUri, k -> new HashMap<>()) + .computeIfAbsent(propertyUri, k -> new HashSet<>()) + .add(endpointUrl); + } + if (classIndex.containsKey(triple.getObject().getURI())) { + String classUri = triple.getObject().getURI(); + if (this.stop_classes.contains(classUri)) + continue; + cpIndex.computeIfAbsent(classUri, k -> new HashMap<>()) + .computeIfAbsent(propertyUri, k -> new HashSet<>()) + .add(endpointUrl); + } + } + } + + // tripleCount += newTriples.size(); + System.out.println("Added " + newTriples.size() + " triples and updated the index."); + } + + /** + * it loads indices from initialised directory + * + * @return + */ + public void loadIndices() { + loadClassIndex(); + loadPropertyIndex(); + loadCpIndex(); + } + + /** + * it loads class-to-endpoint index from initialised file + * + * @return + */ + private void loadClassIndex() { + try (Reader reader = new FileReader(classIndexFilePath)) { + classIndex = new Gson().fromJson(reader, new TypeToken>>() { + }.getType()); + System.out.println("Class Index loaded from " + classIndexFilePath); + } catch (FileNotFoundException e) { + System.err.println("File '" + classIndexFilePath + "' not found."); + } catch (IOException e) { + e.printStackTrace(); + } + } + + /** + * it loads property-to-endpoint index from initialised file + * + * @return + */ + private void loadPropertyIndex() { + try (Reader reader = new FileReader(propertyIndexFilePath)) { + propertyIndex = new Gson().fromJson(reader, new TypeToken>>() { + }.getType()); + System.out.println("Property Index loaded from " + propertyIndexFilePath); + } catch (FileNotFoundException e) { + System.err.println("File '" + propertyIndexFilePath + "' not found."); + } catch (IOException e) { + e.printStackTrace(); + } + } + + /** + * it loads class-to-property-to-endpoint index from initialised file + * + * @return + */ + private void loadCpIndex() { + try (Reader reader = new FileReader(cpIndexFilePath)) { + cpIndex = new Gson().fromJson(reader, new TypeToken>>>() { + }.getType()); + System.out.println("Class-Property multilevel inverted index loaded successfully."); + } catch (FileNotFoundException e) { + System.err.println("File '" + cpIndexFilePath + "' not found."); + } catch (IOException e) { + e.printStackTrace(); + } + } + + /** + * it saves indices into initialised directory + * + * @return + */ + public void saveIndices() { + saveClassIndex(); + savePropertyIndex(); + saveCpIndex(); + } + + /** + * it saves class-to-endpoint index into initialised file + * + * @return + */ + private void saveClassIndex() { + try (Writer writer = new FileWriter(classIndexFilePath)) { + new GsonBuilder().setPrettyPrinting().create().toJson(classIndex, writer); + } catch (IOException e) { + e.printStackTrace(); + } + } + + /** + * it saves property-to-endpoint index into initialised file + * + * @return + */ + private void savePropertyIndex() { + try (Writer writer = new FileWriter(propertyIndexFilePath)) { + new GsonBuilder().setPrettyPrinting().create().toJson(propertyIndex, writer); + } catch (IOException e) { + e.printStackTrace(); + } + } + + /** + * it saves class-to-property-to-endpoint index into initialised file + * + * @return + */ + private void saveCpIndex() { + try (Writer writer = new FileWriter(cpIndexFilePath)) { + new GsonBuilder().setPrettyPrinting().create().toJson(cpIndex, writer); + } catch (IOException e) { + e.printStackTrace(); + } + } + + // private String getSubstringFromLast(String string) { + // int lastSlash = string.lastIndexOf('/'); + // int lastHash = string.lastIndexOf('#'); + // return string.substring(Math.max(lastSlash, lastHash) + 1); + // } + + public static void main(String[] args) { + + String sparqlQuery = """ + PREFIX rdf: + PREFIX owl: + PREFIX rdfs: + SELECT ?class ?property + WHERE { + { + ?subject ?property ?object . + ?subject a ?class . + FILTER ( + isIRI(?class) && + ?class != owl:Ontology && + ?class != owl:Class && + ?class != owl:NamedIndividual && + !isBlank(?property) && + isIRI(?property) && + ?property != rdf:type && + ?property != rdfs:label && + ?property != rdfs:comment + ) + } + UNION + { + ?subject ?property ?object . + ?object a ?class . + FILTER ( + isIRI(?class) && + ?class != owl:Ontology && + ?class != owl:Class && + ?class != owl:NamedIndividual && + !isBlank(?property) && + isIRI(?property) && + ?property != rdf:type && + ?property != rdfs:label && + ?property != rdfs:comment + ) + } + } + """; + String blazegraphBaseUrl = "http://localhost:8080/blazegraph"; + String[] namespaces = { "namespace_uken", "namespace_kin", "namespace_compchem" }; + String indexLocation = "C:/Users/printer_admin/Downloads/KGs/tests"; + + BuildInvertedIndex kgs = new BuildInvertedIndex(); + kgs.setIndexLocation(indexLocation); + kgs.loadStopCPs("src/main/java/uk/ac/cam/cares/jps/base/query/fedq/stopcps.json"); + + for (String namespace : namespaces) { + String endpointUrl = blazegraphBaseUrl + "/namespace/" + namespace + "/sparql"; + System.out.println("Start processing endpoint: " + endpointUrl); + kgs.createInvertedIndex(endpointUrl, sparqlQuery); + } + + kgs.saveIndices(); + System.out.println("Completed and index-files are saved in " + indexLocation + "."); + } +} diff --git a/JPS_BASE_LIB/src/main/java/uk/ac/cam/cares/jps/base/query/fedq/FedqIndex.java b/JPS_BASE_LIB/src/main/java/uk/ac/cam/cares/jps/base/query/fedq/FedqIndex.java new file mode 100644 index 00000000000..c595b3e0baa --- /dev/null +++ b/JPS_BASE_LIB/src/main/java/uk/ac/cam/cares/jps/base/query/fedq/FedqIndex.java @@ -0,0 +1,520 @@ +package uk.ac.cam.cares.jps.base.query.fedq; + +import com.google.gson.*; +import com.google.gson.reflect.TypeToken; + +import org.apache.jena.graph.Node; +import org.apache.jena.graph.Triple; +import org.apache.jena.query.Query; +import org.apache.jena.query.QueryExecution; +import org.apache.jena.query.QueryFactory; +import org.apache.jena.query.QuerySolution; +import org.apache.jena.rdf.model.*; +import org.apache.jena.riot.system.StreamRDF; +import org.apache.jena.riot.system.StreamRDFBase; +import org.apache.jena.riot.Lang; +import org.apache.jena.riot.RDFParser; + +import org.apache.jena.sparql.algebra.Op; +import org.apache.jena.sparql.algebra.OpVisitorBase; +import org.apache.jena.sparql.algebra.OpWalker; +import org.apache.jena.sparql.algebra.op.OpBGP; +import org.apache.jena.sparql.core.BasicPattern; +import org.apache.jena.sparql.exec.http.QueryExecutionHTTP; +import org.apache.jena.vocabulary.RDF; +import org.apache.jena.query.ResultSet; + +import java.io.*; +import java.lang.reflect.Type; +import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * The class wraps some functionality to- + * 1. create inverted index on a number of endpoints + * a) class to endpoint inverted index + * b) property to endpoint inverted index + * c) class-to property to endpoint inverted inedx + * 2. update inverted index on insertion of new set of triple into an endpoint + * 3. load inverted index from a file data into a memory map + * 4. save inverted index from memory map to storage file + */ + +public class FedqIndex { + private static final String CLASS_PROPERTY_EXTRACTION_QUERY = """ + PREFIX rdf: + PREFIX owl: + PREFIX rdfs: + SELECT ?class ?property + WHERE { + { + ?subject ?property ?object . + ?subject a ?class . + FILTER ( + isIRI(?class) && + ?class != owl:Ontology && + ?class != owl:Class && + ?class != owl:NamedIndividual && + !isBlank(?property) && + isIRI(?property) && + ?property != rdf:type && + ?property != rdfs:label && + ?property != rdfs:comment + ) + } + UNION + { + ?subject ?property ?object . + ?object a ?class . + FILTER ( + isIRI(?class) && + ?class != owl:Ontology && + ?class != owl:Class && + ?class != owl:NamedIndividual && + !isBlank(?property) && + isIRI(?property) && + ?property != rdf:type && + ?property != rdfs:label && + ?property != rdfs:comment + ) + } + } + """; + + private String classIndexFilePath = ""; + private String propertyIndexFilePath = ""; + private String cpIndexFilePath = ""; + + // private int tripleCount = 0; + private HashSet stop_classes = new HashSet<>(); + private HashSet stop_properties = new HashSet<>(); + private Map> classIndex = new HashMap<>(); + private Map> propertyIndex = new HashMap<>(); + private Map>> cpIndex = new HashMap<>(); + private Model model = ModelFactory.createDefaultModel(); + + // loads + private Set classes = new HashSet<>(); + private Set properties = new HashSet<>(); + + public FedqIndex() { + this.loadStopCPs("src/main/java/uk/ac/cam/cares/jps/base/query/fedq/stopcps.json"); + setIndexLocation(""); + } + + public FedqIndex(String indexDir) { + this.loadStopCPs("src/main/java/uk/ac/cam/cares/jps/base/query/fedq/stopcps.json"); + this.setIndexLocation(indexDir); + } + + /** + * indexDir specify the root directory of the indices + * + * @param indexDir + * @return + */ + public void setIndexLocation(String indexDir) { + if (indexDir.length() > 0 && !indexDir.endsWith("/")) { + indexDir = indexDir.trim() + "/"; + } + this.classIndexFilePath = indexDir + "cinv.indx"; + this.propertyIndexFilePath = indexDir + "pinv.indx"; + this.cpIndexFilePath = indexDir + "cpinv.indx"; + } + + /** + * createInvertedIndex works on an endpoint to retrieve class and properties + * through + * sparqlQuey to create an index + * + * @param endpointUrl, + * @param sparqlQuery + * @return + */ + public void createInvertedIndex(String endpointUrl) { + int counter = 0; + // Create a Query object + Query query = QueryFactory.create(CLASS_PROPERTY_EXTRACTION_QUERY); + + // Create a QueryExecution using the builder + try (QueryExecution qexec = QueryExecutionHTTP.create() + .endpoint(endpointUrl) + .query(query) + .build()) { + ResultSet results = qexec.execSelect(); + while (results.hasNext()) { + QuerySolution soln = results.nextSolution(); + String classUri = soln.get("class").toString(); + String propertyUri = soln.get("property").toString(); + + if (this.stop_classes.contains(classUri)) + continue; + if (!classUri.isEmpty()) { + classIndex.computeIfAbsent(classUri, k -> new HashSet<>()).add(endpointUrl); + } + + if (this.stop_properties.contains(propertyUri)) + continue; + if (!propertyUri.isEmpty()) { + propertyIndex.computeIfAbsent(propertyUri, k -> new HashSet<>()).add(endpointUrl); + } + + if (!classUri.isEmpty() && !propertyUri.isEmpty()) { + cpIndex.computeIfAbsent(classUri, k -> new HashMap<>()) + .computeIfAbsent(propertyUri, k -> new HashSet<>()) + .add(endpointUrl); + } + + if (++counter % 1000000 == 0) + System.out.print(counter + " "); + } + } catch (Exception e) { + e.printStackTrace(); + } + } + + public Map> getClassIndex() { + return classIndex; + } + + public Map> getPropertyIndex() { + return propertyIndex; + } + + public Map>> getCpIndex() { + return cpIndex; + } + + /** + * it loads stop-CPs: list of common classes & properties + * + * @param stop_cps_file + * @return + */ + public void loadStopCPs(String stop_cps_file) { + Map> stop_cps = new HashMap<>(); + try (Reader reader = new FileReader(stop_cps_file)) { + stop_cps = new Gson().fromJson(reader, new TypeToken>>() { + }.getType()); + this.stop_classes = stop_cps.get("classes"); + this.stop_properties = stop_cps.get("properties"); + System.out.println("Stop-classes & Stop-properties loaded successfully."); + } catch (FileNotFoundException e) { + System.err.println("File '" + stop_cps_file + "' not found."); + } catch (IOException e) { + e.printStackTrace(); + } + } + + /** + * it specifies update of indices on new set of triple to be inserted into an + * endpoint + * + * @param newTriples + * @param endpoints + * @return + */ + public void addTriplesAndUpdateIndex(List newTriples, String endpointUrl) { + for (Triple triple : newTriples) { + // model.add(model.asStatement(triple)); + + if (triple.getPredicate().equals(RDF.type.asNode())) { + String classUri = triple.getObject().getURI(); + if (this.stop_classes.contains(classUri)) + continue; + classIndex.computeIfAbsent(classUri, k -> new HashSet<>()).add(endpointUrl); + } else { + String propertyUri = triple.getPredicate().getURI(); + if (this.stop_properties.contains(propertyUri)) + continue; + propertyIndex.computeIfAbsent(propertyUri, k -> new HashSet<>()).add(endpointUrl); + } + } + + for (Triple triple : newTriples) { + if (propertyIndex.containsKey(triple.getPredicate().getURI())) { + String propertyUri = triple.getPredicate().getURI(); + if (classIndex.containsKey(triple.getSubject().getURI())) { + String classUri = triple.getSubject().getURI(); + if (this.stop_classes.contains(classUri)) + continue; + + cpIndex.computeIfAbsent(classUri, k -> new HashMap<>()) + .computeIfAbsent(propertyUri, k -> new HashSet<>()) + .add(endpointUrl); + } + if (classIndex.containsKey(triple.getObject().getURI())) { + String classUri = triple.getObject().getURI(); + if (this.stop_classes.contains(classUri)) + continue; + cpIndex.computeIfAbsent(classUri, k -> new HashMap<>()) + .computeIfAbsent(propertyUri, k -> new HashSet<>()) + .add(endpointUrl); + } + } + } + + // tripleCount += newTriples.size(); + System.out.println("Added " + newTriples.size() + " triples and updated the index."); + } + + public ArrayList extractTriples(String sparqlInsert) { + ArrayList tripleList = new ArrayList<>(); + + // Step 1: Extract the data portion between `{` and `}` braces + Pattern pattern = Pattern.compile("\\{(.*?)}", Pattern.DOTALL); + Matcher matcher = pattern.matcher(sparqlInsert); + String dataContent = ""; + if (matcher.find()) { + dataContent = matcher.group(1).trim(); + } + + // Step 2: Parse the extracted data content as N-Triples format + ByteArrayInputStream inputStream = new ByteArrayInputStream(dataContent.getBytes()); + + // Step 3: Use a custom StreamRDF to collect triples + StreamRDF tripleCollector = new StreamRDFBase() { + @Override + public void triple(Triple triple) { + tripleList.add(triple); + } + }; + + // Step 4: Parse the triples using RDFParser + RDFParser.create() + .source(inputStream) + .lang(Lang.TTL) + .parse(tripleCollector); + + return tripleList; + } + + /** + * it loads indices from initialised directory + * + * @return + */ + public void loadIndices() { + loadClassIndex(); + loadPropertyIndex(); + loadCpIndex(); + } + + /** + * it loads class-to-endpoint index from initialised file + * + * @return + */ + private void loadClassIndex() { + try (Reader reader = new FileReader(classIndexFilePath)) { + classIndex = new Gson().fromJson(reader, new TypeToken>>() { + }.getType()); + System.out.println("Class Index loaded from " + classIndexFilePath); + } catch (FileNotFoundException e) { + System.err.println("File '" + classIndexFilePath + "' not found."); + } catch (IOException e) { + e.printStackTrace(); + } + } + + /** + * it loads property-to-endpoint index from initialised file + * + * @return + */ + private void loadPropertyIndex() { + try (Reader reader = new FileReader(propertyIndexFilePath)) { + propertyIndex = new Gson().fromJson(reader, new TypeToken>>() { + }.getType()); + System.out.println("Property Index loaded from " + propertyIndexFilePath); + } catch (FileNotFoundException e) { + System.err.println("File '" + propertyIndexFilePath + "' not found."); + } catch (IOException e) { + e.printStackTrace(); + } + } + + /** + * it loads class-to-property-to-endpoint index from initialised file + * + * @return + */ + private void loadCpIndex() { + try (Reader reader = new FileReader(cpIndexFilePath)) { + cpIndex = new Gson().fromJson(reader, new TypeToken>>>() { + }.getType()); + System.out.println("Class-Property multilevel inverted index loaded successfully."); + } catch (FileNotFoundException e) { + System.err.println("File '" + cpIndexFilePath + "' not found."); + } catch (IOException e) { + e.printStackTrace(); + } + } + + /** + * it saves indices into initialised directory + * + * @return + */ + public void saveIndices() { + saveClassIndex(); + savePropertyIndex(); + saveCpIndex(); + } + + /** + * it saves class-to-endpoint index into initialised file + * + * @return + */ + private void saveClassIndex() { + try (Writer writer = new FileWriter(classIndexFilePath)) { + new GsonBuilder().setPrettyPrinting().create().toJson(classIndex, writer); + } catch (IOException e) { + e.printStackTrace(); + } + } + + /** + * it saves property-to-endpoint index into initialised file + * + * @return + */ + private void savePropertyIndex() { + try (Writer writer = new FileWriter(propertyIndexFilePath)) { + new GsonBuilder().setPrettyPrinting().create().toJson(propertyIndex, writer); + } catch (IOException e) { + e.printStackTrace(); + } + } + + /** + * it saves class-to-property-to-endpoint index into initialised file + * + * @return + */ + private void saveCpIndex() { + try (Writer writer = new FileWriter(cpIndexFilePath)) { + new GsonBuilder().setPrettyPrinting().create().toJson(cpIndex, writer); + } catch (IOException e) { + e.printStackTrace(); + } + } + + public void extractClassesAndProperties(String sparqlQuery) { + Query query = QueryFactory.create(sparqlQuery); + Op queryObject = org.apache.jena.sparql.algebra.Algebra.compile(query); + + OpWalker.walk(queryObject, new OpVisitorBase() { + @Override + public void visit(OpBGP opBGP) { + BasicPattern triples = opBGP.getPattern(); + for (Triple triple : triples) { + Node subject = triple.getSubject(); + Node predicate = triple.getPredicate(); + Node object = triple.getObject(); + if (subject.isURI()) + classes.add(subject); + if (predicate.isURI()) + properties.add(predicate); + if (object.isURI() && object.getURI().startsWith("http")) + classes.add(object); + } + } + }); + } + + /** + * it loads indices from initialised directory + * + * @return + */ + public void loadIndices(String indexDir) throws IOException { + if (indexDir.trim().endsWith("/")) { + this.classIndexFilePath = indexDir.trim() + "cinv.indx"; + this.propertyIndexFilePath = indexDir.trim() + "pinv.indx"; + this.cpIndexFilePath = indexDir.trim() + "cpinv.indx"; + } else { + this.classIndexFilePath = indexDir.trim() + "/cinv.indx"; + this.propertyIndexFilePath = indexDir.trim() + "/pinv.indx"; + this.cpIndexFilePath = indexDir.trim() + "/cpinv.indx"; + } + + this.classIndex = loadIndexFromFile(this.classIndexFilePath); + this.propertyIndex = loadIndexFromFile(this.propertyIndexFilePath); + } + + /** + * it loads key-to-endpoint index from initialised file + * + * @return + */ + private Map> loadIndexFromFile(String filePath) throws IOException { + Map> index = new HashMap<>(); + Gson gson = new Gson(); + Type mapType = new TypeToken>>() { + }.getType(); + try (FileReader reader = new FileReader(filePath)) { + index = gson.fromJson(reader, mapType); + } + return index; + } + + /** + * it it finds endpoints from the extracted classes and properties + * + * @return + */ + public Set getEndpoints() { + Set endpoints = new HashSet<>(); + for (Node classUriRef : classes) { + String classUri = classUriRef.getURI(); + if (classIndex.containsKey(classUri)) { + endpoints.addAll(classIndex.get(classUri)); + } + } + for (Node propertyUriRef : properties) { + String propertyUri = propertyUriRef.getURI(); + if (propertyIndex.containsKey(propertyUri)) { + + endpoints.addAll(propertyIndex.get(propertyUri)); + } + + } + + return endpoints; + } + + public static void main(String[] args) { + + // String blazegraphBaseUrl = "http://localhost:8080/blazegraph"; + // String[] namespaces = { "namespace_uken", "namespace_kin", + // "namespace_compchem" }; + + FedqIndex fqix = new FedqIndex("C:/Users/printer_admin/Downloads/KGs/tests"); + + String sparqlInsert = """ + INSERT DATA { + . + . + } + """; + + ArrayList triples = fqix.extractTriples(sparqlInsert); + // addTriplesAndUpdateIndex(List newTriples, String endpointUrl) + for (Triple triple : triples) { + System.out.println(triple.toString()); + } + + // for (String namespace : namespaces) { + // String endpointUrl = blazegraphBaseUrl + "/namespace/" + namespace + + // "/sparql"; + // System.out.println("Start processing endpoint: " + endpointUrl); + // fqix.createInvertedIndex(endpointUrl); + // } + + // fqix.saveIndices(); + // System.out.println("Completed and index-files are saved."); + } +} diff --git a/JPS_BASE_LIB/src/main/java/uk/ac/cam/cares/jps/base/query/fedq/ProcessQuery.java b/JPS_BASE_LIB/src/main/java/uk/ac/cam/cares/jps/base/query/fedq/ProcessQuery.java new file mode 100644 index 00000000000..137d6bfa253 --- /dev/null +++ b/JPS_BASE_LIB/src/main/java/uk/ac/cam/cares/jps/base/query/fedq/ProcessQuery.java @@ -0,0 +1,249 @@ +package uk.ac.cam.cares.jps.base.query.fedq; + +import org.apache.jena.query.Query; +import org.apache.jena.query.QueryFactory; +import org.apache.jena.sparql.algebra.Op; +import org.apache.jena.sparql.algebra.OpWalker; +import org.apache.jena.sparql.algebra.OpVisitorBase; +import org.apache.jena.sparql.algebra.op.OpBGP; +import org.apache.jena.sparql.core.BasicPattern; +import org.eclipse.rdf4j.federated.FedXFactory; +import org.eclipse.rdf4j.federated.endpoint.Endpoint; +import org.eclipse.rdf4j.federated.endpoint.EndpointFactory; +import org.eclipse.rdf4j.federated.repository.FedXRepository; +import org.eclipse.rdf4j.query.BindingSet; +import org.eclipse.rdf4j.query.TupleQuery; +import org.eclipse.rdf4j.query.TupleQueryResult; +import org.eclipse.rdf4j.repository.RepositoryConnection; + +import com.google.gson.Gson; +import com.google.gson.reflect.TypeToken; +import java.lang.reflect.Type; + +import org.apache.jena.graph.Node; +import org.apache.jena.graph.Triple; +import java.io.*; +import java.util.*; + +/** + * The class wraps some functionality to- + * 1. analyse query to extract relevant classes and properties + * 2. load inverted index from files data into memory maps + * 3. find out endpoints from the classes and properties + * 4. process SPARQL query against the endpoints using FedX API + */ + +public class ProcessQuery { + + private String classIndexFilePath = ""; + private String propertyIndexFilePath = ""; + private String cpIndexFilePath = ""; + private Set classes = new HashSet<>(); + private Set properties = new HashSet<>(); + private Map> classIndex = new HashMap<>(); + private Map> propertyIndex = new HashMap<>(); + private Map> cpIndex = new HashMap<>(); + + /** + * It extracts classes and properties from the user query + * + * @return + */ + public void extractClassesAndProperties(String sparqlQuery) { + Query query = QueryFactory.create(sparqlQuery); + Op queryObject = org.apache.jena.sparql.algebra.Algebra.compile(query); + + OpWalker.walk(queryObject, new OpVisitorBase() { + @Override + public void visit(OpBGP opBGP) { + BasicPattern triples = opBGP.getPattern(); + for (Triple triple : triples) { + Node subject = triple.getSubject(); + Node predicate = triple.getPredicate(); + Node object = triple.getObject(); + if (subject.isURI()) + classes.add(subject); + if (predicate.isURI()) + properties.add(predicate); + if (object.isURI() && object.getURI().startsWith("http")) + classes.add(object); + } + } + }); + } + + /** + * it loads indices from initialised directory + * + * @return + */ + public void loadIndices(String indexDir) throws IOException { + if (indexDir.trim().endsWith("/")) { + this.classIndexFilePath = indexDir.trim() + "cinv.indx"; + this.propertyIndexFilePath = indexDir.trim() + "pinv.indx"; + this.cpIndexFilePath = indexDir.trim() + "cpinv.indx"; + } else { + this.classIndexFilePath = indexDir.trim() + "/cinv.indx"; + this.propertyIndexFilePath = indexDir.trim() + "/pinv.indx"; + this.cpIndexFilePath = indexDir.trim() + "/cpinv.indx"; + } + + this.classIndex = loadIndexFromFile(this.classIndexFilePath); + this.propertyIndex = loadIndexFromFile(this.propertyIndexFilePath); + // this.cpIndex = loadIndexFromFile(this.cpIndexFilePath); + // System.out.printf("%d\n%d\n",this.classIndex.size(),this.propertyIndex.size()); + } + + /** + * it loads key-to-endpoint index from initialised file + * + * @return + */ + private Map> loadIndexFromFile(String filePath) throws IOException { + Map> index = new HashMap<>(); + Gson gson = new Gson(); + Type mapType = new TypeToken>>() { + }.getType(); + try (FileReader reader = new FileReader(filePath)) { + index = gson.fromJson(reader, mapType); + } + return index; + } + + /** + * it it finds endpoints from the extracted classes and properties + * + * @return + */ + public Set getEndpoints() { + Set endpoints = new HashSet<>(); + for (Node classUriRef : classes) { + String classUri = classUriRef.getURI(); + if (classIndex.containsKey(classUri)) { + // System.out.println("Found class alignment: " + classUri); + endpoints.addAll(classIndex.get(classUri)); + } + // else { + // System.out.println("Un-aligned class: " + classUri); + // } + } + for (Node propertyUriRef : properties) { + String propertyUri = propertyUriRef.getURI(); + if (propertyIndex.containsKey(propertyUri)) { + // System.out.println("Found property alignment: " + propertyUri); + endpoints.addAll(propertyIndex.get(propertyUri)); + } + // else { + // System.out.println("Un-aligned property: " + propertyUri); + // } + } + // System.out.println("The Final Endpoints: "); + // System.out.println(endpoints); + + return endpoints; + } + + /** + * it processes SPARQL against endpoint-set to retrieve the final result usinf + * FedX API + * + * @param endpoint_set + * @param query + * @return + */ + public Set executeQuery(Set endpoint_set, String sparqlQuery) { + int counter = 0; + List endpoints = new ArrayList<>(); + Set result = new HashSet<>(); + + for (String element : endpoint_set) { + endpoints.add(EndpointFactory.loadSPARQLEndpoint("namesspace_" + counter++, element)); + } + FedXRepository repository = FedXFactory.createFederation(endpoints); + + try (RepositoryConnection conn = repository.getConnection()) { + + TupleQuery tq = conn.prepareTupleQuery(sparqlQuery); + try (TupleQueryResult tqRes = tq.evaluate()) { + + while (tqRes.hasNext()) { + BindingSet b = tqRes.next(); + result.add(b); + } + } + } + + repository.shutDown(); + return result; + } + + public static void main(String[] args) throws IOException { + // String sparqlQuery = """ + // PREFIX rdf: + // PREFIX rdfs: + // PREFIX ontokin: + + // SELECT DISTINCT ?MechanismName + // WHERE { + // ?MechanismIRI rdf:type ontokin:ReactionMechanism . + // ?MechanismIRI rdfs:label ?MechanismName . + // } + // """; + String sparqlQuery = """ + PREFIX rdf: + PREFIX owl: + PREFIX pt: + PREFIX OntoKin: + PREFIX rdfs: + + SELECT DISTINCT ?identifier ?atomicMass ?atomicMassUnits + WHERE { + ?element1 rdf:type pt:Element . + BIND(STRAFTER(STR(?element1), \"#\") AS ?identifier) + ?element2 rdf:type OntoKin:Element . + ?element2 rdfs:label ?identifier1 . + ?element2 OntoKin:hasAtomicMass ?atomicMass . + ?element2 OntoKin:hasAtomicMassUnits ?atomicMassUnits . + FILTER(?identifier = ?identifier1) + } + """; + + ProcessQuery ana = new ProcessQuery(); + String indexLocation = "C:/Users/printer_admin/Downloads/KGs/tests/"; + ana.loadIndices(indexLocation); + + long totatl_duration = 0; + int iteration = 100; + for (int i = 0; i < iteration; i++) { + long startTime = System.nanoTime(); + + Set eps = new HashSet<>(); + + // run against extracted endpoints + // ana.extractClassesAndProperties(sparqlQuery); + // eps = ana.getEndpoints(); + + // run against manual endpoints + eps.add("http://localhost:8080/blazegraph/namespace/namespace_all/sparql"); + eps.add("http://localhost:8080/blazegraph/namespace/namespace_uken/sparql"); + eps.add("http://localhost:8080/blazegraph/namespace/namespace_kin/sparql"); + eps.add("http://localhost:8080/blazegraph/namespace/namespace_compchem/sparql"); + eps.add("http://localhost:8080/blazegraph/namespace/namespace_species/sparql"); + + Set result = ana.executeQuery(eps, sparqlQuery); + long endTime = System.nanoTime(); + long duration = (endTime - startTime) / 1000000; + if (i == 0) { + System.out.println("First execution time: " + duration + " millisecond."); + for (BindingSet element : result) { + System.out.println(element); + } + } + totatl_duration += duration; + + } + + System.out.println("Processing Time: " + totatl_duration / iteration + " millisecond"); + + } +} diff --git a/JPS_BASE_LIB/src/main/java/uk/ac/cam/cares/jps/base/query/fedq/stopcps.json b/JPS_BASE_LIB/src/main/java/uk/ac/cam/cares/jps/base/query/fedq/stopcps.json new file mode 100644 index 00000000000..7786b3ca3d2 --- /dev/null +++ b/JPS_BASE_LIB/src/main/java/uk/ac/cam/cares/jps/base/query/fedq/stopcps.json @@ -0,0 +1,12 @@ +{ + "classes":[ + "http://www.w3.org/2002/07/owl#Ontology", + "http://www.w3.org/2002/07/owl#Class", + "http://www.w3.org/2002/07/owl#NamedIndividual" + ], + "properties":[ + "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", + "http://www.w3.org/2000/01/rdf-schema#label", + "http://www.w3.org/2000/01/rdf-schema#comment" + ] +} \ No newline at end of file diff --git a/JPS_BASE_LIB/src/test/java/uk/ac/cam/cares/jps/base/query/RemoteStoreClientTest.java b/JPS_BASE_LIB/src/test/java/uk/ac/cam/cares/jps/base/query/RemoteStoreClientTest.java index b0debba9eed..24268e4f42b 100644 --- a/JPS_BASE_LIB/src/test/java/uk/ac/cam/cares/jps/base/query/RemoteStoreClientTest.java +++ b/JPS_BASE_LIB/src/test/java/uk/ac/cam/cares/jps/base/query/RemoteStoreClientTest.java @@ -27,10 +27,14 @@ import org.mockito.Mockito; import uk.ac.cam.cares.jps.base.exception.JPSRuntimeException; +import uk.ac.cam.cares.jps.base.query.fedq.BuildInvertedIndex; +import uk.ac.cam.cares.jps.base.query.fedq.FedqIndex; /** - * This class covers both unit tests and regression tests on RemoteStoreClient,

- * which is designed to perform query and update operations on virtually any

+ * This class covers both unit tests and regression tests on RemoteStoreClient, + *

+ * which is designed to perform query and update operations on virtually any + *

* SPARQL Endpoints. * * @author Feroz Farazi (msff2@cam.ac.uk) @@ -42,49 +46,56 @@ public class RemoteStoreClientTest { String updateEndpoint = "http://localhost:8080/blazegraph/namespace/ontokin/sparql"; String userName = "user"; String password = "password"; - + /** - * Verifies if the StoreClient constructor that is designed to

- * set the query endpoint (URL) assigns the value to the corresponding

- * member variable. + * Verifies if the StoreClient constructor that is designed to + *

+ * set the query endpoint (URL) assigns the value to the corresponding + *

+ * member variable. * * @throws SQLException */ @Test - public void queryEndpointSetupTest() throws SQLException{ + public void queryEndpointSetupTest() throws SQLException { RemoteStoreClient kbClient = new RemoteStoreClient(queryEndpoint); assertNotNull(kbClient.getQueryEndpoint()); assertEquals(queryEndpoint, kbClient.getQueryEndpoint()); } - + /** - * Verifies if the StoreClient constructor that is designed to

- * set both the query and update endpoints (URLs) assigns the values to

- * the corresponding member variables. + * Verifies if the StoreClient constructor that is designed to + *

+ * set both the query and update endpoints (URLs) assigns the values to + *

+ * the corresponding member variables. * * @throws SQLException */ @Test - public void queryAndUpdateEndpointsSetupTest() throws SQLException{ + public void queryAndUpdateEndpointsSetupTest() throws SQLException { RemoteStoreClient kbClient = new RemoteStoreClient(queryEndpoint, updateEndpoint); assertNotNull(kbClient.getQueryEndpoint()); assertNotNull(kbClient.getUpdateEndpoint()); assertEquals(updateEndpoint, kbClient.getQueryEndpoint()); assertEquals(queryEndpoint, kbClient.getUpdateEndpoint()); } - + /** - * Checks if the StoreClient constructor that is designed to

- * set the query and update endpoints (URLs) and query assigns the values

- * to the corresponding member variables. + * Checks if the StoreClient constructor that is designed to + *

+ * set the query and update endpoints (URLs) and query assigns the values + *

+ * to the corresponding member variables. * * @throws SQLException */ @Test - public void endpointsAndQuerySetupTest() throws SQLException{ + public void endpointsAndQuerySetupTest() throws SQLException { userName = "user"; password = "password"; - RemoteStoreClient kbClient = new RemoteStoreClient(queryEndpoint, updateEndpoint, formMechanismCountQuery(), userName, password); + RemoteStoreClient kbClient = new RemoteStoreClient(queryEndpoint, updateEndpoint, formMechanismCountQuery(), + userName, password); assertNotNull(kbClient.getQueryEndpoint()); assertNotNull(kbClient.getUpdateEndpoint()); assertNotNull(kbClient.getQuery()); @@ -110,7 +121,7 @@ public void endpointsAndQuerySetupTest() throws SQLException{ assertEquals(updateEndpoint, kbClient.getUpdateEndpoint()); assertEquals(formInsertQuery(), kbClient.getQuery()); } - + @Test public void testIsUpdateEndpointBlazegraphBackended() { queryEndpoint = "/test/Query/Endpoint"; @@ -133,14 +144,16 @@ public void testIsUpdateEndpointBlazegraphBackended() { kbClient = new RemoteStoreClient(queryEndpoint, updateEndpoint); assertTrue(!kbClient.isUpdateEndpointBlazegraphBackended()); } + /** - * Checks if the connection URL established for the update endpoint (URL)

+ * Checks if the connection URL established for the update endpoint (URL) + *

* is the expected one. * * @throws SQLException */ @Test - public void connectionURLForUpdateEndpointTest() throws SQLException{ + public void connectionURLForUpdateEndpointTest() throws SQLException { String updateEndpoint = "http://localhost:8080/test"; RemoteStoreClient kbClient = new RemoteStoreClient(); kbClient.setUpdateEndpoint(updateEndpoint); @@ -148,40 +161,42 @@ public void connectionURLForUpdateEndpointTest() throws SQLException{ assertEquals("jdbc:jena:remote:update=".concat(updateEndpoint), kbClient.getConnectionUrl()); } - - /** - * Checks if the connection URL established for the query endpoint (URL)

- * is the expected one. + * Checks if the connection URL established for the query endpoint (URL) + *

+ * is the expected one. * * @throws SQLException */ @Test - public void connectionURLForQueryEndpointTest() throws SQLException{ + public void connectionURLForQueryEndpointTest() throws SQLException { queryEndpoint = "http://localhost:8080/test"; RemoteStoreClient kbClient = new RemoteStoreClient(queryEndpoint); assertNotNull(kbClient.getConnectionUrl()); assertEquals("jdbc:jena:remote:query=".concat(queryEndpoint), kbClient.getConnectionUrl()); } - + /** - * Checks if the connection URL established for the query and update

- * endpoints (URLs) is the expected one. + * Checks if the connection URL established for the query and update + *

+ * endpoints (URLs) is the expected one. * * @throws SQLException */ @Test - public void connectionURLForQueryAndInsertEndpointsTest() throws SQLException{ + public void connectionURLForQueryAndInsertEndpointsTest() throws SQLException { queryEndpoint = "http://localhost:8080/test"; updateEndpoint = "http://localhost:8080/test"; RemoteStoreClient kbClient = new RemoteStoreClient(queryEndpoint, updateEndpoint); assertNotNull(kbClient.getConnectionUrl()); - assertEquals("jdbc:jena:remote:query=".concat(queryEndpoint).concat("&update=").concat(updateEndpoint), kbClient.getConnectionUrl()); + assertEquals("jdbc:jena:remote:query=".concat(queryEndpoint).concat("&update=").concat(updateEndpoint), + kbClient.getConnectionUrl()); } - + /** - * Checks if the connection URL established for the query endpoint and

- * update endpoint for deletion is the expected one. + * Checks if the connection URL established for the query endpoint and + *

+ * update endpoint for deletion is the expected one. * * @throws SQLException */ @@ -194,15 +209,15 @@ public void connectionURLForQueryAndDeleteEndpointsTest() throws SQLException { assertEquals("jdbc:jena:remote:query=".concat(queryEndpoint).concat("&update=").concat(updateEndpoint), kbClient.getConnectionUrl()); } - + /** * Verifies the validity of both the query URL and update URL.
- * For example, standard protocols for URL, i.e. http and https are supported. + * For example, standard protocols for URL, i.e. http and https are supported. * * @throws SQLException */ @Test - public void connectionHttpURLTest() throws SQLException{ + public void connectionHttpURLTest() throws SQLException { // Tests the query endpoint queryEndpoint = "http://localhost:8080/blazegraph/namespace/ontokin/sparql"; RemoteStoreClient rKBClient = new RemoteStoreClient(queryEndpoint); @@ -213,7 +228,7 @@ public void connectionHttpURLTest() throws SQLException{ queryEndpoint = "httpss://localhost:8080/blazegraph/namespace/ontokin/sparql"; rKBClient = new RemoteStoreClient(queryEndpoint); assertFalse(rKBClient.isConnectionQueryUrlValid(rKBClient.getConnectionUrl())); - // Tests the update endpoint with the update URL only + // Tests the update endpoint with the update URL only updateEndpoint = "http://localhost:8080/blazegraph/namespace/ontokin/sparql"; rKBClient = new RemoteStoreClient(); rKBClient.setUpdateEndpoint(updateEndpoint); @@ -240,15 +255,15 @@ public void connectionHttpURLTest() throws SQLException{ rKBClient = new RemoteStoreClient(queryEndpoint, updateEndpoint); assertFalse(rKBClient.isConnectionUpdateUrlValid(rKBClient.getConnectionUrl())); } - + /** * Verifies the validity of connection URL consisting of a query URL,
- * user name and password. + * user name and password. * * @throws SQLException */ @Test - public void connectionHttpUrlWithAuthTest() throws SQLException{ + public void connectionHttpUrlWithAuthTest() throws SQLException { userName = "user"; password = "password"; queryEndpoint = "http://localhost:8080/blazegraph/namespace/ontokin/sparql"; @@ -256,18 +271,24 @@ public void connectionHttpUrlWithAuthTest() throws SQLException{ remoteKBClient.setQueryEndpoint(queryEndpoint); remoteKBClient.setUser(userName); remoteKBClient.setPassword(password); - assertEquals("jdbc:jena:remote:query=".concat(queryEndpoint).concat("&user=").concat(userName).concat("&password=").concat(password), + assertEquals( + "jdbc:jena:remote:query=".concat(queryEndpoint).concat("&user=").concat(userName).concat("&password=") + .concat(password), remoteKBClient.getConnectionUrl()); } - + /** - * Tests if the HTTP request to run a federated SPARQL query returns the expected

- * result. It also verifies if the mock service created for this test

+ * Tests if the HTTP request to run a federated SPARQL query returns the + * expected + *

+ * result. It also verifies if the mock service created for this test + *

* executes the correct method. - * @throws Exception + * + * @throws Exception */ @Test - public void performMechanismCountQueryTest() throws Exception{ + public void performMechanismCountQueryTest() throws Exception { RemoteStoreClient kbClient = mock(RemoteStoreClient.class); JSONArray jsonArray = new JSONArray(); JSONObject jsonObject = new JSONObject(); @@ -286,14 +307,16 @@ public void performMechanismCountQueryTest() throws Exception{ } /** - * Tests if the HTTP request to run a SPARQL query returns the expected

- * result. It also verifies if the mock service created for this test

+ * Tests if the HTTP request to run a SPARQL query returns the expected + *

+ * result. It also verifies if the mock service created for this test + *

* executes the correct method. * * @throws SQLException */ @Test - public void performFederatedQueryTest() throws SQLException{ + public void performFederatedQueryTest() throws SQLException { RemoteStoreClient kbClient = mock(RemoteStoreClient.class); JSONArray jsonArray = new JSONArray(); JSONObject jsonObject = new JSONObject(); @@ -306,59 +329,62 @@ public void performFederatedQueryTest() throws SQLException{ verify(kbClient).execute(formMechanismCountQuery()); } - /** * Test insert */ @Test public void testInsert() { - - String content = - "\r\n"+ - " \r\n"+ - "\r\n"; - + + String content = "\r\n" + + " \r\n" + + + "\r\n"; + RemoteStoreClient kbClient = Mockito.spy(RemoteStoreClient.class); Mockito.doReturn(1).when(kbClient).executeUpdate(any(UpdateRequest.class)); - + kbClient.insert(null, content, null); - + Mockito.verify(kbClient).insert(null, content, null); Mockito.verify(kbClient).executeUpdate(any(UpdateRequest.class)); } - + /** * Test get method */ @Test public void testGet() { - - String actual = - ""+System.getProperty("line.separator")+ - " "+System.getProperty("line.separator")+ - ""+System.getProperty("line.separator"); - - //mock result + + String actual = "" + + System.getProperty("line.separator") + + " " + + System.getProperty("line.separator") + + "" + System.getProperty("line.separator"); + + // mock result Model model = ModelFactory.createDefaultModel(); - Statement s = ResourceFactory.createStatement(ResourceFactory.createResource("http://www.theworldavatar.com/kb/sgp/singapore/wastenetwork/FoodCourt-001.owl#FoodCourt-001"), - ResourceFactory.createProperty("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), ResourceFactory.createResource("http://www.theworldavatar.com/ontology/ontowaste/OntoWaste.owl#FoodCourt")); + Statement s = ResourceFactory.createStatement( + ResourceFactory.createResource( + "http://www.theworldavatar.com/kb/sgp/singapore/wastenetwork/FoodCourt-001.owl#FoodCourt-001"), + ResourceFactory.createProperty("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), ResourceFactory + .createResource("http://www.theworldavatar.com/ontology/ontowaste/OntoWaste.owl#FoodCourt")); model.add(s); - + RemoteStoreClient kbClient = Mockito.spy(RemoteStoreClient.class); Mockito.doReturn(model).when(kbClient).executeConstruct(any(Query.class)); - + String resourceUrl = null; String accept = null; - + String result = kbClient.get(resourceUrl, accept); - + verify(kbClient).get(resourceUrl, accept); verify(kbClient).executeConstruct(any(Query.class)); - + assertEquals(result, actual); } @@ -384,15 +410,15 @@ public void testExecuteQueryAndUpdateException() { * * @return */ - private static String formMechanismCountQuery(){ + private static String formMechanismCountQuery() { String query = "PREFIX ontokin: \n"; - query = query.concat("PREFIX rdf: \n"); - query = query.concat("SELECT ?x \n"); - query = query.concat("WHERE\n"); - query = query.concat("{\n"); - query = query.concat("?x rdf:type ontokin:ReactionMechanism .\n"); - query = query.concat("}\n"); - return query; + query = query.concat("PREFIX rdf: \n"); + query = query.concat("SELECT ?x \n"); + query = query.concat("WHERE\n"); + query = query.concat("{\n"); + query = query.concat("?x rdf:type ontokin:ReactionMechanism .\n"); + query = query.concat("}\n"); + return query; } /** @@ -400,65 +426,69 @@ private static String formMechanismCountQuery(){ * * @return */ - private static String formMechanismIRIsQuery(){ + private static String formMechanismIRIsQuery() { String query = "PREFIX ontokin: \n"; - query = query.concat("PREFIX rdf: \n"); - query = query.concat("SELECT ?x \n"); - query = query.concat("WHERE\n"); - query = query.concat("{\n"); - query = query.concat("?x rdf:type ontokin:ReactionMechanism .\n"); - query = query.concat("} LIMIT 10\n"); - return query; + query = query.concat("PREFIX rdf: \n"); + query = query.concat("SELECT ?x \n"); + query = query.concat("WHERE\n"); + query = query.concat("{\n"); + query = query.concat("?x rdf:type ontokin:ReactionMechanism .\n"); + query = query.concat("} LIMIT 10\n"); + return query; } - /** * A SPARQL query to retrieve the IRIs of all mechanisms in a repository. * * @return */ - private static String formAnyTriplesQuery(){ + private static String formAnyTriplesQuery() { String query = "PREFIX ontokin: \n"; - query = query.concat("PREFIX rdf: \n"); - query = query.concat("SELECT ?x ?y ?z \n"); - query = query.concat("WHERE\n"); - query = query.concat("{\n"); - query = query.concat("?x ?y ?z .\n"); - query = query.concat("} LIMIT 10\n"); - return query; + query = query.concat("PREFIX rdf: \n"); + query = query.concat("SELECT ?x ?y ?z \n"); + query = query.concat("WHERE\n"); + query = query.concat("{\n"); + query = query.concat("?x ?y ?z .\n"); + query = query.concat("} LIMIT 10\n"); + return query; } - - private static String formInsertQuery(){ + + private static String formInsertQuery() { String query = "PREFIX ontokin: \n"; - query = query.concat("PREFIX rdf: \n"); - query = query.concat("INSERT DATA { ontokin:hasTemperatureExponent \"-0.7\" }"); - return query; + query = query.concat("PREFIX rdf: \n"); + query = query.concat( + "INSERT DATA { ontokin:hasTemperatureExponent \"-0.7\" }"); + return query; } - - private static String formTempExponentQuery(){ + + private static String formTempExponentQuery() { String query = "PREFIX ontokin: \n"; - query = query.concat("PREFIX rdf: \n"); - query = query.concat("SELECT ?tempExponent\n"); - query = query.concat("WHERE\n"); - query = query.concat("{\n"); - query = query.concat(" ontokin:hasTemperatureExponent ?tempExponent .\n"); - query = query.concat("}"); - return query; + query = query.concat("PREFIX rdf: \n"); + query = query.concat("SELECT ?tempExponent\n"); + query = query.concat("WHERE\n"); + query = query.concat("{\n"); + query = query.concat( + " ontokin:hasTemperatureExponent ?tempExponent .\n"); + query = query.concat("}"); + return query; } - - private static String formDeleteQuery(){ + + private static String formDeleteQuery() { String query = "PREFIX ontokin: \n"; - query = query.concat("PREFIX rdf: \n"); - query = query.concat("DELETE DATA { ontokin:hasTemperatureExponent \"-0.7\" }"); - return query; + query = query.concat("PREFIX rdf: \n"); + query = query.concat( + "DELETE DATA { ontokin:hasTemperatureExponent \"-0.7\" }"); + return query; } - + /** - * A federated query developed to eqecute against the endpoints of OntoSpecies and OntoCompChem Knowledge Bases. + * A federated query developed to eqecute against the endpoints of OntoSpecies + * and OntoCompChem Knowledge Bases. + * * @return */ public static String formFederatedQuery() { - String query ="PREFIX OntoSpecies: " + String query = "PREFIX OntoSpecies: " + "PREFIX ontocompchem: " + "PREFIX gc: " + "SELECT DISTINCT ?species ?compchemspecies ?crid ?atomicBond ?geometry ?enthalpyOfFormationValue ?scfEnergyValue ?zeroEnergyValue " @@ -478,19 +508,19 @@ public static String formFederatedQuery() { + "?zeroEnergy gc:hasElectronicEnergy ?zeroElectronicEnergy . " + "?zeroElectronicEnergy gc:hasValue ?zeroEnergyValue . " + "}"; - + return query; } /** * A federated query developed to execute against the endpoints of
* OntoSpecies and OntoCompChem Knowledge Bases to retrieve partial
- * details of species. - * + * details of species. + * * @return */ public static String formFederatedQuery2() { - String query ="PREFIX OntoSpecies: " + String query = "PREFIX OntoSpecies: " + "PREFIX ontocompchem: " + "PREFIX gc: " + "SELECT * " @@ -506,25 +536,110 @@ public static String formFederatedQuery2() { + "?zeroEnergy gc:hasElectronicEnergy ?zeroElectronicEnergy . " + "?zeroElectronicEnergy gc:hasValue ?zeroEnergyValue . " + "}"; - + + return query; + } + + /** + * A federated query developed to execute against the endpoints of
+ * OntoSpecies and OntoCompChem Knowledge Bases to retrieve partial
+ * details of species. + * + * @return + */ + public static String formFederatedQuery3() { + String query = "PREFIX OntoSpecies: " + + "PREFIX ontocompchem: " + + "PREFIX gc: " + + "SELECT * " + + "WHERE { " + + "?x ?y ?z ." + + "} LIMIT 10"; + return query; } - public static void main(String[] args){ + public static String formFederatedQuery4() { + String sparqlQuery = """ + PREFIX rdf: + PREFIX owl: + PREFIX pt: + PREFIX OntoKin: + PREFIX rdfs: + + SELECT DISTINCT ?identifier ?atomicMass ?atomicMassUnits + WHERE { + ?element1 rdf:type pt:Element . + BIND(STRAFTER(STR(?element1), \"#\") AS ?identifier) + ?element2 rdf:type OntoKin:Element . + ?element2 rdfs:label ?identifier1 . + ?element2 OntoKin:hasAtomicMass ?atomicMass . + ?element2 OntoKin:hasAtomicMassUnits ?atomicMassUnits . + FILTER(?identifier = ?identifier1) + } + """; + + return sparqlQuery; + } + + public static void createIndex() { + + String endpoints[] = { "http://localhost:8080/blazegraph/namespace/namespace_kin/sparql", + "http://localhost:8080/blazegraph/namespace/namespace_compchem/sparql", + "http://localhost:8080/blazegraph/namespace/namespace_automotive/sparql", + "http://localhost:8080/blazegraph/namespace/namespace_species/sparql", + "http://localhost:8080/blazegraph/namespace/namespace_uken/sparql" + }; + + FedqIndex fqi = new FedqIndex("C:/Users/printer_admin/Downloads/KGs/20241030"); + + for (String endpointUrl : endpoints) { + System.out.println("Start processing endpoint: " + endpointUrl); + fqi.createInvertedIndex(endpointUrl); + } + + fqi.saveIndices(); + System.out.println("Completed and index-files are saved."); + } + + public static void extendIndex(String endpointUrl) { + + FedqIndex fqi = new FedqIndex("C:/Users/printer_admin/Downloads/KGs/20241030"); + fqi.loadIndices(); + + System.out.println("Start processing endpoint: " + endpointUrl); + fqi.createInvertedIndex(endpointUrl); + + fqi.saveIndices(); + System.out.println("Completed and index-files are saved."); + } + + public static void main(String[] args) { + // createIndex(); + + // extendIndex("http://localhost:8080/blazegraph/namespace/uken_1m/sparql"); + // extendIndex("http://localhost:8080/blazegraph/namespace/uken_10m/sparql"); + // extendIndex("http://localhost:8080/blazegraph/namespace/uken_200m/sparql"); + RemoteStoreClient kbClient = new RemoteStoreClient(); - List endpoints = new ArrayList<>(); - String queryEndpointOntoSpeciesKB = "http://www.theworldavatar.com/blazegraph/namespace/ontospecies/sparql"; - String queryEndpointOntoCompChemKB = "http://www.theworldavatar.com/blazegraph/namespace/ontocompchem/sparql"; - endpoints.add(queryEndpointOntoSpeciesKB); - endpoints.add(queryEndpointOntoCompChemKB); + FedqIndex fii = new FedqIndex("C:/Users/printer_admin/Downloads/KGs/20241030"); + fii.loadIndices(); + fii.extractClassesAndProperties(formFederatedQuery4()); + + List endpoints = new ArrayList<>(fii.getEndpoints()); + // String queryEndpointOntoSpeciesKB = + // "http://localhost:8080/blazegraph/namespace/namespace_all/sparql"; + // endpoints.add(queryEndpointOntoSpeciesKB); + try { - JSONArray result = kbClient.executeFederatedQuery(endpoints, formFederatedQuery2()); + JSONArray result = kbClient.executeFederatedQuery(endpoints, + formFederatedQuery4()); System.out.println(result.toString()); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } - + } - + } diff --git a/JPS_BASE_LIB/src/test/java/uk/ac/cam/cares/jps/base/timeseries/TimeSeriesRDBClientIntegrationTest.java b/JPS_BASE_LIB/src/test/java/uk/ac/cam/cares/jps/base/timeseries/TimeSeriesRDBClientIntegrationTest.java index 8d08596a5b6..2137c9ec2d2 100644 --- a/JPS_BASE_LIB/src/test/java/uk/ac/cam/cares/jps/base/timeseries/TimeSeriesRDBClientIntegrationTest.java +++ b/JPS_BASE_LIB/src/test/java/uk/ac/cam/cares/jps/base/timeseries/TimeSeriesRDBClientIntegrationTest.java @@ -12,6 +12,7 @@ import java.util.OptionalDouble; import org.jooq.*; +import org.jooq.Record; import org.junit.*; import org.jooq.impl.DSL; import static org.jooq.impl.DSL.selectFrom;