Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ci_e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ jobs:
pnpm i
pnpm lint
pnpm build
pnpm run dev &
pnpm preview &

# Step 3: Start BE
- name: Set up and start backend
Expand All @@ -52,7 +52,7 @@ jobs:
# Step 4: Wair for BE and FE Services
- name: Wait for services to be ready
run: |
until curl -s http://localhost:3001; do
until curl -s http://localhost:8081; do
echo "Waiting for frontend to be ready..."
sleep 5
done
Expand Down
14 changes: 8 additions & 6 deletions be_repo/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,17 @@
from google.auth.transport import requests as google_requests
from google.oauth2 import id_token

from configs.database import get_resume_database, get_user_database
from configs.database import get_resume_database, get_user_database, get_key_database
from graphs.qa_graph import create_graph
from modules.evaluator import evaluate_resume, evaluate_resume_with_jd
from modules.job_recommendation_system import job_recommend
from modules.langgraph_qa import get_answer_from_langgraph
from modules.upload import upload_parse_resume

keys_db = get_key_database()
keys_collection = keys_db["keys"]
GOOGLE_CLIENT_ID = keys_collection.find_one({"_id": "google_api"})["api_key"]

# Generate a secure random secret key
secret_key = secrets.token_hex(32) # Generates a 64-character hexadecimal string

Expand All @@ -27,8 +31,6 @@
PERMANENT_SESSION_LIFETIME=timedelta(minutes=30),
)

GOOGLE_CLIENT_ID = '120137358324-l62fq2hlj9r31evvitg55rcl4rf21udd.apps.googleusercontent.com'

# Test MongoDB connection
try:
resume_database = get_resume_database()
Expand Down Expand Up @@ -190,19 +192,19 @@ def interview_question_suggestion():
3. What technologies and tools are used in the project.
4. What technologies and tools are used to acquire the certification and awards.
5. Work experience in the field of technology (if any).

Your response should be structured as follows, using the information you get from the resume:
The idea you get the following questions, such as the project, technologies used, certification and awards:
1. Question 1
2. Question 2
3. ...

For example, you can make suggestions like:
AWS related questions:
1. How do you use AWS services in your project?
2. What do you know about AWS?
3. What is the structure of your AWS environment?

Replace the questions with your own based on the information you get from the resume.
Follow this format for all categories of questions.
Your response should contain only categorized questions. Do not include unrelated information.
Expand Down
14 changes: 9 additions & 5 deletions be_repo/modules/job_recommendation_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,15 @@
from .resume_processor import ResumeProcessor
from .retrieval_engine import RetrievalEngine
from .view import CLIView
from configs.database import get_key_database

keys_db = get_key_database()
keys_collection = keys_db["keys"]

# Neo4j Connection Details
NEO4J_URI = keys_collection.find_one({"_id": "NEO4J_URI"})["api_key"] # Replace with your Neo4j URI
NEO4J_USERNAME = "neo4j" # Replace with your Neo4j username
NEO4J_PASSWORD = keys_collection.find_one({"_id": "NEO4J_PASSWORD"})["api_key"] # Replace with your Neo4j password


def job_recommend(resume_text, user_id):
Expand All @@ -19,11 +28,6 @@ def job_recommend(resume_text, user_id):
logger.error(f'No resume text provided, user_id: {user_id}.')
return 'Error: No resume text provided.'

# Neo4j Connection Details
NEO4J_URI = "neo4j+ssc://7bf5a48e.databases.neo4j.io" # Replace with your Neo4j URI
NEO4J_USERNAME = "neo4j" # Replace with your Neo4j username
NEO4J_PASSWORD = "oxsK7V5_86emZlYQlvCfQHfVWS95wXz29OhtU8GAdFc" # Replace with your Neo4j password

# Initialize Model
neo4j_model = Neo4jModel(
uri=NEO4J_URI,
Expand Down
5 changes: 3 additions & 2 deletions be_repo/modules/langgraph_qa.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,9 @@ def get_answer_from_langgraph(qa_graph, resume_text, user_state_collection, user
events = qa_graph.stream(
{"messages": [("user", question)]}, config, stream_mode="values"
)
final_result = ''
for event in events:
if event["messages"][-1].type == "ai":
return event["messages"][-1].content
final_result = event["messages"][-1].content

return
return final_result
6 changes: 4 additions & 2 deletions be_repo/modules/retrieval_engine.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
# retrieval_engine.py

from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_neo4j import GraphCypherQAChain
from langchain_openai import ChatOpenAI
from langchain.chains.retrieval import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from configs.openai_key import get_openai_api_key # New import
from langchain.prompts import PromptTemplate


class RetrievalEngine:
def __init__(self, resume_processor, neo4j_model):
"""
Expand Down
15 changes: 12 additions & 3 deletions be_repo/modules/verify.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
from neo4j import GraphDatabase
from configs.database import get_key_database

uri = "neo4j+ssc://7bf5a48e.databases.neo4j.io" # Update with your Neo4j URI
username = "neo4j" # Update with your username
password = "oxsK7V5_86emZlYQlvCfQHfVWS95wXz29OhtU8GAdFc" # Update with your password
keys_db = get_key_database()
keys_collection = keys_db["keys"]

# Neo4j Connection Details
NEO4J_URI = keys_collection.find_one({"_id": "NEO4J_URI"})["api_key"] # Replace with your Neo4j URI
NEO4J_USERNAME = "neo4j" # Replace with your Neo4j username
NEO4J_PASSWORD = keys_collection.find_one({"_id": "NEO4J_PASSWORD"})["api_key"] # Replace with your Neo4j password

uri = NEO4J_URI # Update with your Neo4j URI
username = NEO4J_USERNAME # Update with your username
password = NEO4J_PASSWORD # Update with your password

driver = GraphDatabase.driver(uri, auth=(username, password))

Expand Down
6 changes: 3 additions & 3 deletions be_repo/modules/view.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ def display_recommendations(self, recommendations):
Display job recommendations to the user.
"""
if not recommendations:
return 'No job recommendations found based on your resume.'
res = '\nRecommended Jobs for You:\n'
return "No job recommendations found based on your resume."
res = "\nRecommended Jobs for You:\n"
for idx, job in enumerate(recommendations, start=1):
res += f'{idx}. {job}\n'
res += f"{idx}. {job}\n"
return res
49 changes: 34 additions & 15 deletions be_repo/preprocess/neo4j_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,15 @@
import os
from tqdm import tqdm
import logging
from configs.database import get_key_database

keys_db = get_key_database()
keys_collection = keys_db["keys"]

# Neo4j Connection Details
NEO4J_URI = keys_collection.find_one({"_id": "NEO4J_URI"})["api_key"] # Replace with your Neo4j URI
NEO4J_USERNAME = "neo4j" # Replace with your Neo4j username
NEO4J_PASSWORD = keys_collection.find_one({"_id": "NEO4J_PASSWORD"})["api_key"] # Replace with your Neo4j password

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
Expand All @@ -29,8 +38,8 @@
}

# Neo4j connection details from environment variables
uri = "neo4j+ssc://7bf5a48e.databases.neo4j.io"
AUTH = ("neo4j", "oxsK7V5_86emZlYQlvCfQHfVWS95wXz29OhtU8GAdFc")
uri = NEO4J_URI
AUTH = (NEO4J_USERNAME, NEO4J_PASSWORD)

# Initialize Neo4j driver
driver = GraphDatabase.driver(uri, auth=AUTH)
Expand All @@ -44,6 +53,7 @@
driver.close()
exit(1)


# Function to load node CSV files into DataFrames
def load_node_dataframes(csv_dir, node_types):
node_dfs = {}
Expand All @@ -57,6 +67,7 @@ def load_node_dataframes(csv_dir, node_types):
logger.warning(f"CSV file for node type '{node_type}' not found in '{csv_dir}'.")
return node_dfs


# Function to load relationships CSV file into a DataFrame
def load_relationships_data(csv_dir):
relationships_file = os.path.join(csv_dir, 'relationships.csv')
Expand All @@ -68,6 +79,7 @@ def load_relationships_data(csv_dir):
logger.warning(f"Relationships CSV file not found in '{csv_dir}'.")
return None


# Function to create constraints
def create_constraints(driver):
constraints = [
Expand All @@ -91,12 +103,15 @@ def create_constraints(driver):
logger.error(f"Failed to execute constraint '{constraint}': {e}")
logger.info("Constraints created or already exist.")


def standardize_relationship_types(df):
if 'relationship_type' in df.columns:
original_types = df['relationship_type'].unique()
df['relationship_type'] = df['relationship_type'].str.upper().str.replace(' ', '_').str.replace('[^A-Z0-9_]', '', regex=True)
df['relationship_type'] = df['relationship_type'].str.upper().str.replace(' ', '_').str.replace('[^A-Z0-9_]',
'', regex=True)
standardized_types = df['relationship_type'].unique()
logger.info(f"Standardized relationship types from {len(original_types)} to {len(standardized_types)} unique types.")
logger.info(
f"Standardized relationship types from {len(original_types)} to {len(standardized_types)} unique types.")
return df


Expand All @@ -116,12 +131,13 @@ def import_nodes_in_batches(tx, node_type, df, batch_size=1000):
df['embedding'] = df['embedding'].apply(lambda x: json.loads(x) if pd.notnull(x) else [])
data = df.to_dict('records')
for i in tqdm(range(0, len(data), batch_size), desc=f"Importing {node_type} in batches"):
batch = data[i:i+batch_size]
batch = data[i:i + batch_size]
try:
tx.run(query, rows=batch)
logger.info(f"Imported batch {i//batch_size + 1} for node type '{node_type}'.")
logger.info(f"Imported batch {i // batch_size + 1} for node type '{node_type}'.")
except Exception as e:
logger.error(f"Error importing batch {i//batch_size + 1} for node type '{node_type}': {e}")
logger.error(f"Error importing batch {i // batch_size + 1} for node type '{node_type}': {e}")


# Function to create a mapping from ID to node type
def create_id_to_type_mapping(node_dfs):
Expand All @@ -135,6 +151,7 @@ def create_id_to_type_mapping(node_dfs):
logger.info("Created ID to node type mapping.")
return id_to_type


# Function to infer node types for relationships
def infer_node_types(rel_df, id_to_type):
rel_df['start_node_type'] = rel_df['start_node_id'].apply(lambda x: id_to_type.get(int(x), 'Unknown'))
Expand All @@ -149,10 +166,11 @@ def infer_node_types(rel_df, id_to_type):
logger.warning(unknown_end)
return rel_df


def import_relationships_in_batches(tx, df, batch_size=1000):
data = df.to_dict('records')
for i in tqdm(range(0, len(data), batch_size), desc="Importing relationships in batches"):
batch = data[i:i+batch_size]
batch = data[i:i + batch_size]
unwind_data = [
{
"start_id": int(rel['start_node_id']),
Expand All @@ -170,9 +188,9 @@ def import_relationships_in_batches(tx, df, batch_size=1000):
"""
try:
tx.run(query, rows=unwind_data)
logger.info(f"Imported batch {i//batch_size + 1} of relationships.")
logger.info(f"Imported batch {i // batch_size + 1} of relationships.")
except Exception as e:
logger.error(f"Error importing batch {i//batch_size + 1} of relationships: {e}")
logger.error(f"Error importing batch {i // batch_size + 1} of relationships: {e}")


# Main function to perform the import
Expand All @@ -198,25 +216,25 @@ def main():
if relationship_df is not None:
# Standardize relationship types
relationship_df = standardize_relationship_types(relationship_df)

# Infer node types if not present
if 'start_node_type' not in relationship_df.columns or 'end_node_type' not in relationship_df.columns:
logger.info("Inferring 'start_node_type' and 'end_node_type' based on node IDs...")
relationship_df = infer_node_types(relationship_df, id_to_type)

# Check for unknown node types
unknown_rels = relationship_df[
(relationship_df['start_node_type'] == 'Unknown') |
(relationship_df['start_node_type'] == 'Unknown') |
(relationship_df['end_node_type'] == 'Unknown')
]
]
if not unknown_rels.empty:
logger.error("Some relationships have unknown node types. Please verify your data.")
logger.error(unknown_rels)
# Skip unknown relationships
relationship_df = relationship_df[
(relationship_df['start_node_type'] != 'Unknown') &
(relationship_df['start_node_type'] != 'Unknown') &
(relationship_df['end_node_type'] != 'Unknown')
]
]

# Import relationships
with driver.session() as session:
Expand All @@ -229,5 +247,6 @@ def main():
driver.close()
logger.info("Neo4j import completed.")


if __name__ == "__main__":
main()
2 changes: 1 addition & 1 deletion fe_repo/vite.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ export default defineConfig(({command}) => {
},
plugins: [react()],
define: {
__CURRENT_URI__: JSON.stringify(command == 'serve' ? 'http://34.23.225.150' : 'http://localhost')
__CURRENT_URI__: JSON.stringify(command == 'serve' ? 'http://34.75.46.217' : 'http://localhost')
}
};
})
Loading