Update: Add in response from cypherqagraph. Fix: Fix issue where job title doesn't exists, allow node search switch

andyasdd1 · andyasdd1 · commit 3f5c4997f489 · 2024-12-01T17:20:52.000-05:00
diff --git a/be_repo/modules/job_recommendation_system.py b/be_repo/modules/job_recommendation_system.py
@@ -8,7 +8,6 @@
 import sys
 
 def main():
-    
 
     # Redirect standard output to a file
     sys.stdout = open('output.log', 'w')
@@ -34,6 +33,8 @@ def main():
         password=NEO4J_PASSWORD
     )
     
+    node_label = "JTitle"  # Adjust as needed; could be dynamic based on user input or other criteria
+
     # Initialize Controller Components
     resume_processor = ResumeProcessor()
     retrieval_engine = RetrievalEngine(resume_processor, neo4j_model)
@@ -51,7 +52,6 @@ def main():
         return
     
     # Perform Mixed Retrieval for 'JD' Node Label
-    node_label = "JD"  # Adjust as needed; could be dynamic based on user input or other criteria
     similar_docs, graph_results = retrieval_engine.perform_mixed_retrieval(resume_text, node_label=node_label)
     
     if not similar_docs and not graph_results:
diff --git a/be_repo/modules/recommendation_generator.py b/be_repo/modules/recommendation_generator.py
@@ -9,36 +9,42 @@ def merge_results(self, vector_docs, graph_results):
 
         # Process vector similarity results
         for doc in vector_docs:
-            comp = doc.metadata.get("comp", "")
-            resp = doc.metadata.get("resp", "")
-            job_title = f"{resp} at {comp}".strip()
-            if job_title:
-                combined_jobs[job_title] = combined_jobs.get(job_title, 0) + 1
+            # Exclude 'id' and get all other non-empty metadata properties
+            metadata = {k: v for k, v in doc.metadata.items() if k != 'id' and v}
+            # Create a description string from the non-empty properties
+            job_description = ', '.join(f"{k}: {v}" for k, v in metadata.items())
+            if job_description:
+                combined_jobs[job_description] = combined_jobs.get(job_description, 0) + 1
 
         # Process graph traversal results
-        # Access the context from intermediate steps
         intermediate_steps = graph_results.get('intermediate_steps', [])
         if len(intermediate_steps) > 1:
             context = intermediate_steps[1].get('context', [])
             for job in context:
-                job_title = job.get('job_title', '')
-                company = job.get('company', '')
-                if job_title and company:
-                    combined_job = f"{job_title} at {company}"
-                    combined_jobs[combined_job] = combined_jobs.get(combined_job, 0) + 1
+                # Exclude 'id' and get all other non-empty properties
+                job_data = {k: v for k, v in job.items() if k != 'id' and v}
+                # Create a description string
+                job_description = ', '.join(f"{k}: {v}" for k, v in job_data.items())
+                if job_description:
+                    combined_jobs[job_description] = combined_jobs.get(job_description, 0) + 1
+
+        # Include the 'result' from 'graph_results' directly
+        graph_result_text = graph_results.get('result', '').strip()
+        if graph_result_text:
+            combined_jobs[graph_result_text] = combined_jobs.get(graph_result_text, 0) + 1
 
         # Convert to sorted list based on combined score
         sorted_jobs = sorted(combined_jobs.items(), key=lambda item: item[1], reverse=True)
         return [job for job, score in sorted_jobs]
-    
+
     def generate_recommendations(self, vector_docs, graph_results):
         """
         Generate a ranked list of job recommendations by merging vector and graph results.
-        
+
         Parameters:
             vector_docs (List[Document]): Documents from vector similarity search.
             graph_results (dict): Results from graph traversal.
-        
+
         Returns:
             List[str]: Ranked list of unique job recommendations.
         """
diff --git a/be_repo/modules/retrieval_engine.py b/be_repo/modules/retrieval_engine.py
@@ -26,13 +26,10 @@ def __init__(self, resume_processor, neo4j_model):
         self.graph_chain = self.neo4j_model.get_graph_chain()
 
         # Define the PromptTemplate with 'context' as input variable
-        prompt = PromptTemplate(
-            template="""
-            You are an expert Cypher query writer for a Neo4j graph database.
+        template="""
+            You are an assistant that matches resumes to relevant job descriptions.
 
-            Given the user's question, generate an efficient Cypher query that:
-            - extract entities and relationships from the following resume. 
-            - Focus solely on the resume content.
+            Given the user's resume, find the most relevant job descriptions.
 
             **Entities to Extract:**
             - **Education (Edu):** Details about degrees, fields of study, institutions, start and end years, GPA.
@@ -42,30 +39,20 @@ def __init__(self, resume_processor, neo4j_model):
             - **Certifications (Cert):** Certification names, issuing organizations, expiration dates.
             - **Soft Skills (SSkill):** Non-technical skills like leadership, communication.
 
-            **Relationships to Identify:**
-            - **UTILIZES_SKILL:** A Work Experience (WE) node utilizes a Skill (Skill) node.
-            - **USES_TECH:** A Project (Proj) node uses a Skill (Skill) node as a technology.
-            - **REL_TO (Proj to Skill):** A Project (Proj) node is related to a Skill (Skill) node.
-            - **REL_TO (Skill to Skill):** A Skill (Skill) node is similar to another Skill (Skill) node.
-
             **Resume:**
             \"\"\"
             {context}
             \"\"\"
-            """,
-            input_variables=["input"]  
-        )
-
-         # Create a documents chain
-        self.combine_docs_chain = create_stuff_documents_chain(self.llm, prompt=prompt)
+            """
         
-        # Initialize Retrieval Chain
-        # Default node_label is 'JD'; can be adjusted as needed
-        self.retrieval_chain = create_retrieval_chain(
-            self.neo4j_model.get_retriever(node_label="JD"),
-            self.combine_docs_chain
+        self.prompt_template = PromptTemplate(
+            template=template,
+            input_variables=["input"]
         )
 
+        # Create a documents chain
+        self.combine_docs_chain = create_stuff_documents_chain(self.llm, self.prompt_template)
+
     def perform_mixed_retrieval(self, resume_text, node_label="JD"):
         """
         Perform mixed retrieval using vector similarity and graph traversal.
@@ -89,14 +76,21 @@ def perform_mixed_retrieval(self, resume_text, node_label="JD"):
         # Access the schema property correctly
         schema = self.neo4j_model.graph.get_schema
 
+        # Get the retriever for the given node label
+        retriever = self.neo4j_model.get_retriever(node_label=node_label)
+
+        # Create the retrieval chain with the retriever and the combine_docs_chain
+        retrieval_chain = create_retrieval_chain(
+            retriever,
+            self.combine_docs_chain
+        )
+
         # Perform vector similarity search
-        similar_docs_result = self.retrieval_chain.invoke({"input": resume_text})  # Corrected to 'context'
+        similar_docs_result = retrieval_chain.invoke({"input": resume_text})  # Corrected to 'context'
         similar_docs = similar_docs_result.get("output", [])
         print("similar_docs_result:", similar_docs_result)
         print("Keys in similar_docs_result:", similar_docs_result.keys())
         
-
-
         for doc in similar_docs:
             print("Document Metadata:", doc.metadata)