BUMETCS673 · Stanford997 · Nov 21, 2024 · Dec 1, 2024 · Dec 1, 2024 · Dec 1, 2024
diff --git a/.github/workflows/ci_e2e.yml b/.github/workflows/ci_e2e.yml
@@ -40,7 +40,7 @@ jobs:
           pnpm i
           pnpm lint
           pnpm build
-          pnpm run dev &
+          pnpm run preview &
 
       # Step 3: Start BE
       - name: Set up and start backend
@@ -52,7 +52,7 @@ jobs:
       # Step 4: Wair for BE and FE Services
       - name: Wait for services to be ready
         run: |
-          until curl -s http://localhost:3001; do
+          until curl -s http://localhost:8081; do
             echo "Waiting for frontend to be ready..."
             sleep 5
           done

diff --git a/be_repo/app.py b/be_repo/app.py
@@ -6,13 +6,17 @@
 from google.auth.transport import requests as google_requests
 from google.oauth2 import id_token
 
-from configs.database import get_resume_database, get_user_database
+from configs.database import get_resume_database, get_user_database, get_key_database
 from graphs.qa_graph import create_graph
 from modules.evaluator import evaluate_resume, evaluate_resume_with_jd
 from modules.job_recommendation_system import job_recommend
 from modules.langgraph_qa import get_answer_from_langgraph
 from modules.upload import upload_parse_resume
 
+keys_db = get_key_database()
+keys_collection = keys_db["keys"]
+GOOGLE_CLIENT_ID = keys_collection.find_one({"_id": "google_api"})["api_key"]
+
 # Generate a secure random secret key
 secret_key = secrets.token_hex(32)  # Generates a 64-character hexadecimal string
 
@@ -27,8 +31,6 @@
     PERMANENT_SESSION_LIFETIME=timedelta(minutes=30),
 )
 
-GOOGLE_CLIENT_ID = '120137358324-l62fq2hlj9r31evvitg55rcl4rf21udd.apps.googleusercontent.com'
-
 # Test MongoDB connection
 try:
     resume_database = get_resume_database()
@@ -190,19 +192,19 @@ def interview_question_suggestion():
         3. What technologies and tools are used in the project.
         4. What technologies and tools are used to acquire the certification and awards.
         5. Work experience in the field of technology (if any).
-        
+
         Your response should be structured as follows, using the information you get from the resume:
         The idea you get the following questions, such as the project, technologies used, certification and awards:
         1. Question 1
         2. Question 2
         3. ...
-        
+
         For example, you can make suggestions like:
         AWS related questions:
         1. How do you use AWS services in your project?
         2. What do you know about AWS?
         3. What is the structure of your AWS environment?
-        
+
         Replace the questions with your own based on the information you get from the resume. 
         Follow this format for all categories of questions.
         Your response should contain only categorized questions. Do not include unrelated information.

diff --git a/be_repo/modules/job_recommendation_system.py b/be_repo/modules/job_recommendation_system.py
@@ -7,6 +7,15 @@
 from .resume_processor import ResumeProcessor
 from .retrieval_engine import RetrievalEngine
 from .view import CLIView
+from configs.database import get_key_database
+
+keys_db = get_key_database()
+keys_collection = keys_db["keys"]
+
+# Neo4j Connection Details
+NEO4J_URI = keys_collection.find_one({"_id": "NEO4J_URI"})["api_key"]  # Replace with your Neo4j URI
+NEO4J_USERNAME = "neo4j"  # Replace with your Neo4j username
+NEO4J_PASSWORD = keys_collection.find_one({"_id": "NEO4J_PASSWORD"})["api_key"]  # Replace with your Neo4j password
 
 
 def job_recommend(resume_text, user_id):
@@ -19,11 +28,6 @@ def job_recommend(resume_text, user_id):
         logger.error(f'No resume text provided, user_id: {user_id}.')
         return 'Error: No resume text provided.'
 
-    # Neo4j Connection Details
-    NEO4J_URI = "neo4j+ssc://7bf5a48e.databases.neo4j.io"  # Replace with your Neo4j URI
-    NEO4J_USERNAME = "neo4j"  # Replace with your Neo4j username
-    NEO4J_PASSWORD = "oxsK7V5_86emZlYQlvCfQHfVWS95wXz29OhtU8GAdFc"  # Replace with your Neo4j password
-
     # Initialize Model
     neo4j_model = Neo4jModel(
         uri=NEO4J_URI,

diff --git a/be_repo/modules/langgraph_qa.py b/be_repo/modules/langgraph_qa.py
@@ -23,8 +23,9 @@ def get_answer_from_langgraph(qa_graph, resume_text, user_state_collection, user
     events = qa_graph.stream(
         {"messages": [("user", question)]}, config, stream_mode="values"
     )
+    final_result = ''
     for event in events:
         if event["messages"][-1].type == "ai":
-            return event["messages"][-1].content
+            final_result = event["messages"][-1].content
 
-    return
+    return final_result
diff --git a/be_repo/modules/retrieval_engine.py b/be_repo/modules/retrieval_engine.py
@@ -1,10 +1,12 @@
 # retrieval_engine.py
 
-from langchain.chains.combine_documents import create_stuff_documents_chain
+from langchain_neo4j import GraphCypherQAChain
+from langchain_openai import ChatOpenAI
 from langchain.chains.retrieval import create_retrieval_chain
+from langchain.chains.combine_documents import create_stuff_documents_chain
+from configs.openai_key import get_openai_api_key  # New import
 from langchain.prompts import PromptTemplate
 
-
 class RetrievalEngine:
     def __init__(self, resume_processor, neo4j_model):
         """

diff --git a/be_repo/modules/verify.py b/be_repo/modules/verify.py
@@ -1,8 +1,17 @@
 from neo4j import GraphDatabase
+from configs.database import get_key_database
 
-uri = "neo4j+ssc://7bf5a48e.databases.neo4j.io"  # Update with your Neo4j URI
-username = "neo4j"             # Update with your username
-password = "oxsK7V5_86emZlYQlvCfQHfVWS95wXz29OhtU8GAdFc"          # Update with your password
+keys_db = get_key_database()
+keys_collection = keys_db["keys"]
+
+# Neo4j Connection Details
+NEO4J_URI = keys_collection.find_one({"_id": "NEO4J_URI"})["api_key"]  # Replace with your Neo4j URI
+NEO4J_USERNAME = "neo4j"  # Replace with your Neo4j username
+NEO4J_PASSWORD = keys_collection.find_one({"_id": "NEO4J_PASSWORD"})["api_key"]  # Replace with your Neo4j password
+
+uri = NEO4J_URI  # Update with your Neo4j URI
+username = NEO4J_USERNAME  # Update with your username
+password = NEO4J_PASSWORD  # Update with your password
 
 driver = GraphDatabase.driver(uri, auth=(username, password))
 

diff --git a/be_repo/modules/view.py b/be_repo/modules/view.py
@@ -28,8 +28,8 @@ def display_recommendations(self, recommendations):
         Display job recommendations to the user.
         """
         if not recommendations:
-            return 'No job recommendations found based on your resume.'
-        res = '\nRecommended Jobs for You:\n'
+            return "No job recommendations found based on your resume."
+        res = "\nRecommended Jobs for You:\n"
         for idx, job in enumerate(recommendations, start=1):
-            res += f'{idx}. {job}\n'
+            res += f"{idx}. {job}\n"
         return res
diff --git a/be_repo/preprocess/neo4j_import.py b/be_repo/preprocess/neo4j_import.py
@@ -6,6 +6,15 @@
 import os
 from tqdm import tqdm
 import logging
+from configs.database import get_key_database
+
+keys_db = get_key_database()
+keys_collection = keys_db["keys"]
+
+# Neo4j Connection Details
+NEO4J_URI = keys_collection.find_one({"_id": "NEO4J_URI"})["api_key"]  # Replace with your Neo4j URI
+NEO4J_USERNAME = "neo4j"  # Replace with your Neo4j username
+NEO4J_PASSWORD = keys_collection.find_one({"_id": "NEO4J_PASSWORD"})["api_key"]  # Replace with your Neo4j password
 
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -29,8 +38,8 @@
 }
 
 # Neo4j connection details from environment variables
-uri = "neo4j+ssc://7bf5a48e.databases.neo4j.io"
-AUTH = ("neo4j", "oxsK7V5_86emZlYQlvCfQHfVWS95wXz29OhtU8GAdFc")
+uri = NEO4J_URI
+AUTH = (NEO4J_USERNAME, NEO4J_PASSWORD)
 
 # Initialize Neo4j driver
 driver = GraphDatabase.driver(uri, auth=AUTH)
@@ -44,6 +53,7 @@
     driver.close()
     exit(1)
 
+
 # Function to load node CSV files into DataFrames
 def load_node_dataframes(csv_dir, node_types):
     node_dfs = {}
@@ -57,6 +67,7 @@ def load_node_dataframes(csv_dir, node_types):
             logger.warning(f"CSV file for node type '{node_type}' not found in '{csv_dir}'.")
     return node_dfs
 
+
 # Function to load relationships CSV file into a DataFrame
 def load_relationships_data(csv_dir):
     relationships_file = os.path.join(csv_dir, 'relationships.csv')
@@ -68,6 +79,7 @@ def load_relationships_data(csv_dir):
         logger.warning(f"Relationships CSV file not found in '{csv_dir}'.")
         return None
 
+
 # Function to create constraints
 def create_constraints(driver):
     constraints = [
@@ -91,12 +103,15 @@ def create_constraints(driver):
                 logger.error(f"Failed to execute constraint '{constraint}': {e}")
     logger.info("Constraints created or already exist.")
 
+
 def standardize_relationship_types(df):
     if 'relationship_type' in df.columns:
         original_types = df['relationship_type'].unique()
-        df['relationship_type'] = df['relationship_type'].str.upper().str.replace(' ', '_').str.replace('[^A-Z0-9_]', '', regex=True)
+        df['relationship_type'] = df['relationship_type'].str.upper().str.replace(' ', '_').str.replace('[^A-Z0-9_]',
+                                                                                                        '', regex=True)
         standardized_types = df['relationship_type'].unique()
-        logger.info(f"Standardized relationship types from {len(original_types)} to {len(standardized_types)} unique types.")
+        logger.info(
+            f"Standardized relationship types from {len(original_types)} to {len(standardized_types)} unique types.")
     return df
 
 
@@ -116,12 +131,13 @@ def import_nodes_in_batches(tx, node_type, df, batch_size=1000):
         df['embedding'] = df['embedding'].apply(lambda x: json.loads(x) if pd.notnull(x) else [])
     data = df.to_dict('records')
     for i in tqdm(range(0, len(data), batch_size), desc=f"Importing {node_type} in batches"):
-        batch = data[i:i+batch_size]
+        batch = data[i:i + batch_size]
         try:
             tx.run(query, rows=batch)
-            logger.info(f"Imported batch {i//batch_size + 1} for node type '{node_type}'.")
+            logger.info(f"Imported batch {i // batch_size + 1} for node type '{node_type}'.")
         except Exception as e:
-            logger.error(f"Error importing batch {i//batch_size + 1} for node type '{node_type}': {e}")
+            logger.error(f"Error importing batch {i // batch_size + 1} for node type '{node_type}': {e}")
+
 
 # Function to create a mapping from ID to node type
 def create_id_to_type_mapping(node_dfs):
@@ -135,6 +151,7 @@ def create_id_to_type_mapping(node_dfs):
     logger.info("Created ID to node type mapping.")
     return id_to_type
 
+
 # Function to infer node types for relationships
 def infer_node_types(rel_df, id_to_type):
     rel_df['start_node_type'] = rel_df['start_node_id'].apply(lambda x: id_to_type.get(int(x), 'Unknown'))
@@ -149,10 +166,11 @@ def infer_node_types(rel_df, id_to_type):
         logger.warning(unknown_end)
     return rel_df
 
+
 def import_relationships_in_batches(tx, df, batch_size=1000):
     data = df.to_dict('records')
     for i in tqdm(range(0, len(data), batch_size), desc="Importing relationships in batches"):
-        batch = data[i:i+batch_size]
+        batch = data[i:i + batch_size]
         unwind_data = [
             {
                 "start_id": int(rel['start_node_id']),
@@ -170,9 +188,9 @@ def import_relationships_in_batches(tx, df, batch_size=1000):
         """
         try:
             tx.run(query, rows=unwind_data)
-            logger.info(f"Imported batch {i//batch_size + 1} of relationships.")
+            logger.info(f"Imported batch {i // batch_size + 1} of relationships.")
         except Exception as e:
-            logger.error(f"Error importing batch {i//batch_size + 1} of relationships: {e}")
+            logger.error(f"Error importing batch {i // batch_size + 1} of relationships: {e}")
 
 
 # Main function to perform the import
@@ -198,25 +216,25 @@ def main():
     if relationship_df is not None:
         # Standardize relationship types
         relationship_df = standardize_relationship_types(relationship_df)
-        
+
         # Infer node types if not present
         if 'start_node_type' not in relationship_df.columns or 'end_node_type' not in relationship_df.columns:
             logger.info("Inferring 'start_node_type' and 'end_node_type' based on node IDs...")
             relationship_df = infer_node_types(relationship_df, id_to_type)
 
         # Check for unknown node types
         unknown_rels = relationship_df[
-            (relationship_df['start_node_type'] == 'Unknown') | 
+            (relationship_df['start_node_type'] == 'Unknown') |
             (relationship_df['end_node_type'] == 'Unknown')
-        ]
+            ]
         if not unknown_rels.empty:
             logger.error("Some relationships have unknown node types. Please verify your data.")
             logger.error(unknown_rels)
             # Skip unknown relationships
             relationship_df = relationship_df[
-                (relationship_df['start_node_type'] != 'Unknown') & 
+                (relationship_df['start_node_type'] != 'Unknown') &
                 (relationship_df['end_node_type'] != 'Unknown')
-            ]
+                ]
 
         # Import relationships
         with driver.session() as session:
@@ -229,5 +247,6 @@ def main():
     driver.close()
     logger.info("Neo4j import completed.")
 
+
 if __name__ == "__main__":
     main()
diff --git a/fe_repo/vite.config.ts b/fe_repo/vite.config.ts
@@ -14,7 +14,7 @@ export default defineConfig(({command}) => {
     },
     plugins: [react()],
     define: {
-      __CURRENT_URI__: JSON.stringify(command == 'serve' ? 'http://34.23.225.150' : 'http://localhost')
+      __CURRENT_URI__: JSON.stringify(command == 'serve' ? 'http://34.75.46.217' : 'http://localhost')
     }
   };
 })