Skip to content

Commit edccbb4

Browse files
committed
FEAT: Add LinkedIn data retrieval and resolve test failures for data agents
1 parent 6e25168 commit edccbb4

File tree

6 files changed

+66
-15
lines changed

6 files changed

+66
-15
lines changed
14.6 KB
Binary file not shown.

src/agents/data_retrieval.py

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,53 @@
33

44
class GoogleSearchAgent:
55
def get_google_data(self, query):
6+
"""
7+
Retrieve summarized data from Google search results.
8+
"""
69
url = f"https://www.google.com/search?q={query}"
710
headers = {"User-Agent": "Mozilla/5.0"}
811
try:
912
response = requests.get(url, headers=headers, timeout=10)
1013
response.raise_for_status()
1114
soup = BeautifulSoup(response.text, "html.parser")
12-
results = [a.text for a in soup.select("a")]
13-
return {"google_results": results[:5]}
15+
results = [a.text for a in soup.select("a") if a.text.strip()]
16+
return {"google_summary": " ".join(results[:5])}
1417
except requests.exceptions.RequestException as e:
1518
return {"error": f"Google Search error: {e}"}
1619

1720
class WikipediaAgent:
1821
def get_wikipedia_data(self, query):
22+
"""
23+
Retrieve summarized data from Wikipedia.
24+
"""
1925
url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{query}"
2026
try:
2127
response = requests.get(url, timeout=10)
2228
response.raise_for_status()
23-
return response.json()
29+
data = response.json()
30+
return {"wikipedia_summary": data.get("extract", "No summary available")}
2431
except requests.exceptions.RequestException as e:
2532
return {"error": f"Wikipedia API error: {e}"}
33+
34+
class LinkedInSearchAgent:
35+
def get_linkedin_data(self, query):
36+
"""
37+
Retrieve company-related information from LinkedIn.
38+
"""
39+
url = f"https://www.linkedin.com/search/results/companies/?keywords={query}"
40+
headers = {"User-Agent": "Mozilla/5.0"}
41+
try:
42+
response = requests.get(url, headers=headers, timeout=10)
43+
response.raise_for_status()
44+
soup = BeautifulSoup(response.text, "html.parser")
45+
46+
results = [
47+
item.text.strip()
48+
for item in soup.select("div.entity-result__primary-subtitle")
49+
]
50+
if results:
51+
return {"linkedin_summary": " ".join(results[:3])}
52+
else:
53+
return {"linkedin_summary": "No data available"}
54+
except requests.exceptions.RequestException as e:
55+
return {"error": f"LinkedIn Search error: {e}"}

src/agents/swot_analysis.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,23 @@ def __init__(self):
1111

1212
def analyze(self, data, company_name):
1313
openai.api_key = self.api_key
14+
15+
google_summary = data.get("google_summary", "No data available")
16+
wikipedia_summary = data.get("wikipedia_summary", "No data available")
17+
linkedin_summary = data.get("linkedin_summary", "No data available")
18+
1419
prompt = (
15-
f"Perform a detailed SWOT analysis for the company '{company_name}' based on the following information:\n\n"
16-
f"Google Summary:\n{data.get('google_summary', 'No data available')}\n\n"
17-
f"Wikipedia Summary:\n{data.get('wikipedia_summary', 'No data available')}\n\n"
20+
f"Perform a detailed SWOT analysis for the company '{company_name}' using the following data:\n\n"
21+
f"Google Summary:\n{google_summary}\n\n"
22+
f"Wikipedia Summary:\n{wikipedia_summary}\n\n"
23+
f"LinkedIn Summary:\n{linkedin_summary}\n\n"
1824
"Provide the analysis in the format:\n"
1925
"Strengths:\n- Point 1\n- Point 2\n\n"
2026
"Weaknesses:\n- Point 1\n- Point 2\n\n"
2127
"Opportunities:\n- Point 1\n- Point 2\n\n"
2228
"Threats:\n- Point 1\n- Point 2"
2329
)
30+
2431
try:
2532
response = openai.ChatCompletion.create(
2633
model="gpt-4",

src/orchestrator.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from agents.data_retrieval import GoogleSearchAgent, WikipediaAgent
1+
from agents.data_retrieval import GoogleSearchAgent, WikipediaAgent, LinkedInSearchAgent
22
from utils.normalization import normalize_data
33
from agents.nlp_processing import NLPProcessingAgent
44
from agents.swot_analysis import SWOTAnalysisAgent
@@ -8,6 +8,7 @@ class Orchestrator:
88
def __init__(self):
99
self.google_agent = GoogleSearchAgent()
1010
self.wikipedia_agent = WikipediaAgent()
11+
self.linkedin_agent = LinkedInSearchAgent()
1112
self.nlp_agent = NLPProcessingAgent()
1213
self.swot_agent = SWOTAnalysisAgent()
1314
self.report_agent = ReportGenerator()
@@ -16,9 +17,10 @@ async def run(self, query):
1617
# Step 1: Retrieve data from alternative sources
1718
google_data = self.google_agent.get_google_data(query)
1819
wikipedia_data = self.wikipedia_agent.get_wikipedia_data(query)
20+
linkedin_data = self.linkedin_agent.get_linkedin_data(query)
1921

2022
# Step 2: Normalize data
21-
raw_data = {"google": google_data, "wikipedia": wikipedia_data}
23+
raw_data = {"google": google_data, "wikipedia": wikipedia_data, "linkedin": linkedin_data,}
2224
normalized_data = normalize_data(raw_data)
2325

2426
# Step 3: Process data

src/utils/normalization.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,20 @@ def normalize_data(raw_data):
66

77
google_data = raw_data.get("google", {})
88
if "error" not in google_data:
9-
normalized_data["google_summary"] = google_data.get("google_results", [])
9+
normalized_data["google_summary"] = google_data.get("google_summary", [])
1010
else:
1111
normalized_data["google_summary"] = google_data.get("error")
1212

1313
wikipedia_data = raw_data.get("wikipedia", {})
1414
if "error" not in wikipedia_data:
15-
normalized_data["wikipedia_summary"] = wikipedia_data.get("extract", "No summary available")
15+
normalized_data["wikipedia_summary"] = wikipedia_data.get("wikipedia_summary", "No summary available")
1616
else:
1717
normalized_data["wikipedia_summary"] = wikipedia_data.get("error")
1818

19+
linkedin_data = raw_data.get("linkedin", {})
20+
if "error" not in linkedin_data:
21+
normalized_data["linkedin_summary"] = linkedin_data.get("linkedin_summary", "No data available")
22+
else:
23+
normalized_data["linkedin_summary"] = linkedin_data.get("error")
24+
1925
return normalized_data

tests/test_data_retrieval.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,19 @@
1-
from src.agents.data_retrieval import GoogleSearchAgent, WikipediaAgent
1+
from src.agents.data_retrieval import GoogleSearchAgent, WikipediaAgent, LinkedInSearchAgent
22

33
def test_google_data_retrieval():
44
agent = GoogleSearchAgent()
55
result = agent.get_google_data("OpenAI")
6-
assert "google_results" in result
7-
assert len(result["google_results"]) > 0
6+
assert "google_summary" in result
7+
assert len(result["google_summary"]) > 0
88

99
def test_wikipedia_data_retrieval():
1010
agent = WikipediaAgent()
1111
result = agent.get_wikipedia_data("OpenAI")
12-
assert "extract" in result
13-
assert result["extract"] != ""
12+
assert "wikipedia_summary" in result
13+
assert len(result["wikipedia_summary"]) > 0
14+
15+
def test_linkedin_data_retrieval():
16+
agent = LinkedInSearchAgent()
17+
result = agent.get_linkedin_data("OpenAI")
18+
assert "linkedin_summary" in result
19+
assert len(result["linkedin_summary"]) > 0

0 commit comments

Comments
 (0)