diff --git a/.github/workflows/ci_e2e.yml b/.github/workflows/ci_e2e.yml index 2e00c0a30..5e5680b68 100644 --- a/.github/workflows/ci_e2e.yml +++ b/.github/workflows/ci_e2e.yml @@ -40,7 +40,7 @@ jobs: pnpm i pnpm lint pnpm build - pnpm run dev & + pnpm run preview & # Step 3: Start BE - name: Set up and start backend @@ -52,7 +52,7 @@ jobs: # Step 4: Wair for BE and FE Services - name: Wait for services to be ready run: | - until curl -s http://localhost:3001; do + until curl -s http://localhost:8081; do echo "Waiting for frontend to be ready..." sleep 5 done diff --git a/be_repo/app.py b/be_repo/app.py index 1fd78d629..e20962162 100644 --- a/be_repo/app.py +++ b/be_repo/app.py @@ -6,13 +6,17 @@ from google.auth.transport import requests as google_requests from google.oauth2 import id_token -from configs.database import get_resume_database, get_user_database +from configs.database import get_resume_database, get_user_database, get_key_database from graphs.qa_graph import create_graph from modules.evaluator import evaluate_resume, evaluate_resume_with_jd from modules.job_recommendation_system import job_recommend from modules.langgraph_qa import get_answer_from_langgraph from modules.upload import upload_parse_resume +keys_db = get_key_database() +keys_collection = keys_db["keys"] +GOOGLE_CLIENT_ID = keys_collection.find_one({"_id": "google_api"})["api_key"] + # Generate a secure random secret key secret_key = secrets.token_hex(32) # Generates a 64-character hexadecimal string @@ -27,8 +31,6 @@ PERMANENT_SESSION_LIFETIME=timedelta(minutes=30), ) -GOOGLE_CLIENT_ID = '120137358324-l62fq2hlj9r31evvitg55rcl4rf21udd.apps.googleusercontent.com' - # Test MongoDB connection try: resume_database = get_resume_database() @@ -190,19 +192,19 @@ def interview_question_suggestion(): 3. What technologies and tools are used in the project. 4. What technologies and tools are used to acquire the certification and awards. 5. Work experience in the field of technology (if any). - + Your response should be structured as follows, using the information you get from the resume: The idea you get the following questions, such as the project, technologies used, certification and awards: 1. Question 1 2. Question 2 3. ... - + For example, you can make suggestions like: AWS related questions: 1. How do you use AWS services in your project? 2. What do you know about AWS? 3. What is the structure of your AWS environment? - + Replace the questions with your own based on the information you get from the resume. Follow this format for all categories of questions. Your response should contain only categorized questions. Do not include unrelated information. diff --git a/be_repo/modules/job_recommendation_system.py b/be_repo/modules/job_recommendation_system.py index 79f84e1d3..0466ed3ec 100644 --- a/be_repo/modules/job_recommendation_system.py +++ b/be_repo/modules/job_recommendation_system.py @@ -7,6 +7,15 @@ from .resume_processor import ResumeProcessor from .retrieval_engine import RetrievalEngine from .view import CLIView +from configs.database import get_key_database + +keys_db = get_key_database() +keys_collection = keys_db["keys"] + +# Neo4j Connection Details +NEO4J_URI = keys_collection.find_one({"_id": "NEO4J_URI"})["api_key"] # Replace with your Neo4j URI +NEO4J_USERNAME = "neo4j" # Replace with your Neo4j username +NEO4J_PASSWORD = keys_collection.find_one({"_id": "NEO4J_PASSWORD"})["api_key"] # Replace with your Neo4j password def job_recommend(resume_text, user_id): @@ -19,11 +28,6 @@ def job_recommend(resume_text, user_id): logger.error(f'No resume text provided, user_id: {user_id}.') return 'Error: No resume text provided.' - # Neo4j Connection Details - NEO4J_URI = "neo4j+ssc://7bf5a48e.databases.neo4j.io" # Replace with your Neo4j URI - NEO4J_USERNAME = "neo4j" # Replace with your Neo4j username - NEO4J_PASSWORD = "oxsK7V5_86emZlYQlvCfQHfVWS95wXz29OhtU8GAdFc" # Replace with your Neo4j password - # Initialize Model neo4j_model = Neo4jModel( uri=NEO4J_URI, diff --git a/be_repo/modules/langgraph_qa.py b/be_repo/modules/langgraph_qa.py index 591de0828..61e514228 100644 --- a/be_repo/modules/langgraph_qa.py +++ b/be_repo/modules/langgraph_qa.py @@ -23,8 +23,9 @@ def get_answer_from_langgraph(qa_graph, resume_text, user_state_collection, user events = qa_graph.stream( {"messages": [("user", question)]}, config, stream_mode="values" ) + final_result = '' for event in events: if event["messages"][-1].type == "ai": - return event["messages"][-1].content + final_result = event["messages"][-1].content - return + return final_result diff --git a/be_repo/modules/retrieval_engine.py b/be_repo/modules/retrieval_engine.py index 46ab42bb3..d9d4765e7 100644 --- a/be_repo/modules/retrieval_engine.py +++ b/be_repo/modules/retrieval_engine.py @@ -1,10 +1,12 @@ # retrieval_engine.py -from langchain.chains.combine_documents import create_stuff_documents_chain +from langchain_neo4j import GraphCypherQAChain +from langchain_openai import ChatOpenAI from langchain.chains.retrieval import create_retrieval_chain +from langchain.chains.combine_documents import create_stuff_documents_chain +from configs.openai_key import get_openai_api_key # New import from langchain.prompts import PromptTemplate - class RetrievalEngine: def __init__(self, resume_processor, neo4j_model): """ diff --git a/be_repo/modules/verify.py b/be_repo/modules/verify.py index a5ec2e025..da034826c 100644 --- a/be_repo/modules/verify.py +++ b/be_repo/modules/verify.py @@ -1,8 +1,17 @@ from neo4j import GraphDatabase +from configs.database import get_key_database -uri = "neo4j+ssc://7bf5a48e.databases.neo4j.io" # Update with your Neo4j URI -username = "neo4j" # Update with your username -password = "oxsK7V5_86emZlYQlvCfQHfVWS95wXz29OhtU8GAdFc" # Update with your password +keys_db = get_key_database() +keys_collection = keys_db["keys"] + +# Neo4j Connection Details +NEO4J_URI = keys_collection.find_one({"_id": "NEO4J_URI"})["api_key"] # Replace with your Neo4j URI +NEO4J_USERNAME = "neo4j" # Replace with your Neo4j username +NEO4J_PASSWORD = keys_collection.find_one({"_id": "NEO4J_PASSWORD"})["api_key"] # Replace with your Neo4j password + +uri = NEO4J_URI # Update with your Neo4j URI +username = NEO4J_USERNAME # Update with your username +password = NEO4J_PASSWORD # Update with your password driver = GraphDatabase.driver(uri, auth=(username, password)) diff --git a/be_repo/modules/view.py b/be_repo/modules/view.py index f6bd1264b..d427efa2c 100644 --- a/be_repo/modules/view.py +++ b/be_repo/modules/view.py @@ -28,8 +28,8 @@ def display_recommendations(self, recommendations): Display job recommendations to the user. """ if not recommendations: - return 'No job recommendations found based on your resume.' - res = '\nRecommended Jobs for You:\n' + return "No job recommendations found based on your resume." + res = "\nRecommended Jobs for You:\n" for idx, job in enumerate(recommendations, start=1): - res += f'{idx}. {job}\n' + res += f"{idx}. {job}\n" return res diff --git a/be_repo/preprocess/neo4j_import.py b/be_repo/preprocess/neo4j_import.py index afffd1dec..41951e3da 100644 --- a/be_repo/preprocess/neo4j_import.py +++ b/be_repo/preprocess/neo4j_import.py @@ -6,6 +6,15 @@ import os from tqdm import tqdm import logging +from configs.database import get_key_database + +keys_db = get_key_database() +keys_collection = keys_db["keys"] + +# Neo4j Connection Details +NEO4J_URI = keys_collection.find_one({"_id": "NEO4J_URI"})["api_key"] # Replace with your Neo4j URI +NEO4J_USERNAME = "neo4j" # Replace with your Neo4j username +NEO4J_PASSWORD = keys_collection.find_one({"_id": "NEO4J_PASSWORD"})["api_key"] # Replace with your Neo4j password # Configure logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') @@ -29,8 +38,8 @@ } # Neo4j connection details from environment variables -uri = "neo4j+ssc://7bf5a48e.databases.neo4j.io" -AUTH = ("neo4j", "oxsK7V5_86emZlYQlvCfQHfVWS95wXz29OhtU8GAdFc") +uri = NEO4J_URI +AUTH = (NEO4J_USERNAME, NEO4J_PASSWORD) # Initialize Neo4j driver driver = GraphDatabase.driver(uri, auth=AUTH) @@ -44,6 +53,7 @@ driver.close() exit(1) + # Function to load node CSV files into DataFrames def load_node_dataframes(csv_dir, node_types): node_dfs = {} @@ -57,6 +67,7 @@ def load_node_dataframes(csv_dir, node_types): logger.warning(f"CSV file for node type '{node_type}' not found in '{csv_dir}'.") return node_dfs + # Function to load relationships CSV file into a DataFrame def load_relationships_data(csv_dir): relationships_file = os.path.join(csv_dir, 'relationships.csv') @@ -68,6 +79,7 @@ def load_relationships_data(csv_dir): logger.warning(f"Relationships CSV file not found in '{csv_dir}'.") return None + # Function to create constraints def create_constraints(driver): constraints = [ @@ -91,12 +103,15 @@ def create_constraints(driver): logger.error(f"Failed to execute constraint '{constraint}': {e}") logger.info("Constraints created or already exist.") + def standardize_relationship_types(df): if 'relationship_type' in df.columns: original_types = df['relationship_type'].unique() - df['relationship_type'] = df['relationship_type'].str.upper().str.replace(' ', '_').str.replace('[^A-Z0-9_]', '', regex=True) + df['relationship_type'] = df['relationship_type'].str.upper().str.replace(' ', '_').str.replace('[^A-Z0-9_]', + '', regex=True) standardized_types = df['relationship_type'].unique() - logger.info(f"Standardized relationship types from {len(original_types)} to {len(standardized_types)} unique types.") + logger.info( + f"Standardized relationship types from {len(original_types)} to {len(standardized_types)} unique types.") return df @@ -116,12 +131,13 @@ def import_nodes_in_batches(tx, node_type, df, batch_size=1000): df['embedding'] = df['embedding'].apply(lambda x: json.loads(x) if pd.notnull(x) else []) data = df.to_dict('records') for i in tqdm(range(0, len(data), batch_size), desc=f"Importing {node_type} in batches"): - batch = data[i:i+batch_size] + batch = data[i:i + batch_size] try: tx.run(query, rows=batch) - logger.info(f"Imported batch {i//batch_size + 1} for node type '{node_type}'.") + logger.info(f"Imported batch {i // batch_size + 1} for node type '{node_type}'.") except Exception as e: - logger.error(f"Error importing batch {i//batch_size + 1} for node type '{node_type}': {e}") + logger.error(f"Error importing batch {i // batch_size + 1} for node type '{node_type}': {e}") + # Function to create a mapping from ID to node type def create_id_to_type_mapping(node_dfs): @@ -135,6 +151,7 @@ def create_id_to_type_mapping(node_dfs): logger.info("Created ID to node type mapping.") return id_to_type + # Function to infer node types for relationships def infer_node_types(rel_df, id_to_type): rel_df['start_node_type'] = rel_df['start_node_id'].apply(lambda x: id_to_type.get(int(x), 'Unknown')) @@ -149,10 +166,11 @@ def infer_node_types(rel_df, id_to_type): logger.warning(unknown_end) return rel_df + def import_relationships_in_batches(tx, df, batch_size=1000): data = df.to_dict('records') for i in tqdm(range(0, len(data), batch_size), desc="Importing relationships in batches"): - batch = data[i:i+batch_size] + batch = data[i:i + batch_size] unwind_data = [ { "start_id": int(rel['start_node_id']), @@ -170,9 +188,9 @@ def import_relationships_in_batches(tx, df, batch_size=1000): """ try: tx.run(query, rows=unwind_data) - logger.info(f"Imported batch {i//batch_size + 1} of relationships.") + logger.info(f"Imported batch {i // batch_size + 1} of relationships.") except Exception as e: - logger.error(f"Error importing batch {i//batch_size + 1} of relationships: {e}") + logger.error(f"Error importing batch {i // batch_size + 1} of relationships: {e}") # Main function to perform the import @@ -198,7 +216,7 @@ def main(): if relationship_df is not None: # Standardize relationship types relationship_df = standardize_relationship_types(relationship_df) - + # Infer node types if not present if 'start_node_type' not in relationship_df.columns or 'end_node_type' not in relationship_df.columns: logger.info("Inferring 'start_node_type' and 'end_node_type' based on node IDs...") @@ -206,17 +224,17 @@ def main(): # Check for unknown node types unknown_rels = relationship_df[ - (relationship_df['start_node_type'] == 'Unknown') | + (relationship_df['start_node_type'] == 'Unknown') | (relationship_df['end_node_type'] == 'Unknown') - ] + ] if not unknown_rels.empty: logger.error("Some relationships have unknown node types. Please verify your data.") logger.error(unknown_rels) # Skip unknown relationships relationship_df = relationship_df[ - (relationship_df['start_node_type'] != 'Unknown') & + (relationship_df['start_node_type'] != 'Unknown') & (relationship_df['end_node_type'] != 'Unknown') - ] + ] # Import relationships with driver.session() as session: @@ -229,5 +247,6 @@ def main(): driver.close() logger.info("Neo4j import completed.") + if __name__ == "__main__": main() diff --git a/fe_repo/vite.config.ts b/fe_repo/vite.config.ts index b263e5e25..8dd075a72 100644 --- a/fe_repo/vite.config.ts +++ b/fe_repo/vite.config.ts @@ -14,7 +14,7 @@ export default defineConfig(({command}) => { }, plugins: [react()], define: { - __CURRENT_URI__: JSON.stringify(command == 'serve' ? 'http://34.23.225.150' : 'http://localhost') + __CURRENT_URI__: JSON.stringify(command == 'serve' ? 'http://34.75.46.217' : 'http://localhost') } }; })