Skip to content

Commit 90a6591

Browse files
authored
Merge pull request #537 from ZeeShekh1908/main
fixed ai assistant and summarizer issue
2 parents 08fdb29 + fa5001a commit 90a6591

File tree

3 files changed

+318
-503
lines changed

3 files changed

+318
-503
lines changed

Research_Paper_Summary.py

Lines changed: 100 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -1,128 +1,124 @@
11
import streamlit as st
2+
import PyPDF2
23
from gtts import gTTS
3-
import fitz
44
import tempfile
5+
import matplotlib.pyplot as plt
6+
from collections import Counter
7+
import re
8+
import io
59

10+
# ------------------- APP CONFIG -------------------
11+
st.set_page_config(page_title="AI Research Paper Assistant", page_icon="📄", layout="wide")
612

7-
# APP CONFIG
8-
st.set_page_config(page_title="AI Research Paper Assistant", page_icon="📄", layout="centered")
9-
10-
11-
# CUSTOM CSS
12-
st.markdown(
13-
"""
14-
<style>
15-
/* Background */
16-
.stApp { background-color: #9fcbf5 !important; color: black !important; }
17-
18-
/* Global text */
19-
h1, h2, h3, h4, h5, h6, p, label, span { color: black !important; }
20-
21-
/* Title */
22-
.title-card {
23-
background-color: white;
24-
padding: 20px;
25-
border-radius: 15px;
26-
box-shadow: 0px 4px 8px rgba(0,0,0,0.08);
27-
text-align: center;
28-
margin-bottom: 20px;
29-
color: black !important;
30-
}
31-
32-
/* File uploader box */
33-
div[data-testid="stFileUploader"] {
34-
border: 2px dashed #4a90e2 !important;
35-
background-color: #f5f2f2 !important;
36-
border-radius: 12px;
37-
padding: 30px;
38-
}
39-
40-
/* Buttons */
41-
div.stButton > button { border-radius: 8px; padding: 10px 22px; font-weight: 600; border: none; }
42-
43-
/* Upload & Summarize button */
44-
div.stButton > button:first-child {
45-
background: linear-gradient(90deg, #2f80ed, #56ccf2) !important;
46-
color: white !important;
47-
}
48-
49-
/* Back button */
50-
div.stButton > button[kind="secondary"], div.stButton > button:nth-child(2) {
51-
background-color: #e6e9f5 !important;
52-
color: black !important;
53-
}
54-
</style>
55-
""",
56-
unsafe_allow_html=True
57-
)
58-
59-
60-
# SESSION STATE INIT
61-
if "uploaded_file" not in st.session_state:
62-
st.session_state.uploaded_file = None
13+
# ------------------- SESSION STATE -------------------
6314
if "summary" not in st.session_state:
6415
st.session_state.summary = ""
16+
if "detailed_summary" not in st.session_state:
17+
st.session_state.detailed_summary = ""
6518
if "audio_file" not in st.session_state:
6619
st.session_state.audio_file = None
20+
if "pdf_text" not in st.session_state:
21+
st.session_state.pdf_text = ""
22+
if "chat_history" not in st.session_state:
23+
st.session_state.chat_history = []
6724

68-
69-
# FUNCTIONS
70-
def extract_text_from_pdf(pdf_file):
25+
# ------------------- FUNCTIONS -------------------
26+
def extract_text_from_pdf(uploaded_file):
7127
text = ""
72-
pdf_document = fitz.open(stream=pdf_file.read(), filetype="pdf")
73-
for page_num in range(len(pdf_document)):
74-
text += pdf_document[page_num].get_text()
28+
reader = PyPDF2.PdfReader(uploaded_file)
29+
for page in reader.pages:
30+
page_text = page.extract_text() or ""
31+
text += page_text
7532
return text
7633

77-
def generate_summary(text):
78-
return text[:900] + "..." if len(text) > 900 else text
34+
def generate_summaries(text):
35+
# Simulated AI Summaries (Replace with LLM API later)
36+
tldr = text[:300] + "..." if len(text) > 300 else text
37+
detailed = text[:1000] + "..." if len(text) > 1000 else text
38+
return tldr, detailed
39+
40+
def generate_visualizations(text):
41+
# Basic visualization: word frequency
42+
words = re.findall(r'\b\w+\b', text.lower())
43+
common_words = [w for w in words if len(w) > 4]
44+
counter = Counter(common_words)
45+
most_common = counter.most_common(5)
46+
labels, values = zip(*most_common)
47+
48+
fig, ax = plt.subplots()
49+
ax.bar(labels, values, color="skyblue")
50+
ax.set_title("Top Keywords in Paper")
51+
ax.set_ylabel("Frequency")
52+
53+
buf = io.BytesIO()
54+
plt.savefig(buf, format="png")
55+
buf.seek(0)
56+
return buf
7957

8058
def text_to_speech(text):
8159
tts = gTTS(text=text, lang="en")
8260
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
8361
tts.save(temp_file.name)
8462
return temp_file.name
8563

64+
def chat_with_ai(user_input):
65+
# Simulated chatbot response
66+
response = f"This is a simulated answer to: '{user_input}'. Replace with AI model API."
67+
return response
8668

87-
# TITLE
88-
st.markdown("<div class='title-card'><h2>📑 Upload Your Research Paper</h2><p>Upload a PDF to get a concise summary</p></div>", unsafe_allow_html=True)
89-
90-
91-
# MAIN CARD
92-
st.markdown("<div class='main-card'>", unsafe_allow_html=True)
93-
uploaded_file = st.file_uploader("Choose PDF File", type=["pdf"])
94-
95-
# Buttons
96-
col1, col2 = st.columns([1, 2])
97-
with col1:
98-
summarize_btn = st.button("📑 Upload & Summarize")
99-
with col2:
100-
home_btn = st.button("⬅️ Back to Home")
101-
st.markdown("</div>", unsafe_allow_html=True)
102-
103-
104-
# BUTTON LOGIC
105-
if home_btn:
106-
st.session_state.uploaded_file = None
107-
st.session_state.summary = ""
108-
st.session_state.audio_file = None
109-
st.info("Back to Home clicked!")
69+
# ------------------- UI HEADER -------------------
70+
st.title("📄 AI Research Paper Assistant")
71+
st.markdown("Upload a PDF, explore summaries, keyword charts, and chat with an AI assistant.")
11072

73+
uploaded_file = st.file_uploader("Upload PDF", type=["pdf"])
11174

112-
# Upload & Summarize button
113-
if summarize_btn and uploaded_file:
114-
with st.spinner("Extracting & summarizing..."):
75+
# ------------------- PROCESS PDF -------------------
76+
if uploaded_file and st.button("🔍 Process PDF"):
77+
with st.spinner("Reading and summarizing your PDF..."):
11578
text = extract_text_from_pdf(uploaded_file)
116-
st.session_state.summary = generate_summary(text)
117-
st.session_state.audio_file = text_to_speech(st.session_state.summary)
118-
st.success("✅ Summary Generated!")
119-
120-
121-
# DISPLAY SUMMARY
122-
if st.session_state.summary:
123-
st.markdown("<div class='main-card'>", unsafe_allow_html=True)
124-
st.text_area("Summary", st.session_state.summary, height=250)
125-
if st.button("🔊 Read Summary Aloud"):
126-
if st.session_state.audio_file:
127-
st.audio(st.session_state.audio_file, format="audio/mp3")
128-
st.markdown("</div>", unsafe_allow_html=True)
79+
st.session_state.pdf_text = text
80+
tldr, detailed = generate_summaries(text)
81+
st.session_state.summary = tldr
82+
st.session_state.detailed_summary = detailed
83+
st.session_state.audio_file = text_to_speech(tldr)
84+
st.success("✅ PDF processed successfully!")
85+
86+
# ------------------- TABS -------------------
87+
if st.session_state.pdf_text:
88+
tab1, tab2, tab3 = st.tabs(["🧠 Summary", "📊 Visuals", "🤖 AI Assistant"])
89+
90+
# --- TAB 1: Summary ---
91+
with tab1:
92+
st.subheader("TL;DR Summary")
93+
st.write(st.session_state.summary)
94+
95+
st.subheader("Detailed Summary")
96+
st.write(st.session_state.detailed_summary)
97+
98+
if st.button("🔊 Read Summary Aloud"):
99+
if st.session_state.audio_file:
100+
st.audio(st.session_state.audio_file, format="audio/mp3")
101+
102+
# --- TAB 2: Visuals ---
103+
with tab2:
104+
st.subheader("Keyword Analysis")
105+
chart = generate_visualizations(st.session_state.pdf_text)
106+
st.image(chart, caption="Top Keywords in Paper")
107+
108+
# --- TAB 3: AI Assistant ---
109+
with tab3:
110+
st.subheader("Chat with AI Assistant")
111+
user_query = st.text_input("Ask a question about this paper")
112+
if st.button("💬 Send"):
113+
if user_query.strip():
114+
ai_response = chat_with_ai(user_query)
115+
st.session_state.chat_history.append(("You", user_query))
116+
st.session_state.chat_history.append(("AI", ai_response))
117+
118+
for sender, msg in st.session_state.chat_history:
119+
if sender == "You":
120+
st.markdown(f"**🧑 You:** {msg}")
121+
else:
122+
st.markdown(f"**🤖 AI:** {msg}")
123+
else:
124+
st.info("📂 Upload a PDF and click **Process PDF** to begin.")

manage.py

Lines changed: 90 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,64 +1,112 @@
11
from flask import Flask, request, jsonify, send_from_directory
22
from flask_cors import CORS
3-
import PyPDF2
4-
import re
3+
import fitz # PyMuPDF
54
import os
65
import webbrowser
6+
import PyPDF2
7+
import re
8+
from collections import Counter
9+
import google.generativeai as genai
10+
11+
# -----------------------------
12+
# Load Gemini API Key
13+
# -----------------------------
14+
GEMINI_KEY = os.getenv("GEMINI_API_KEY")
15+
if not GEMINI_KEY:
16+
print("❌ Gemini API key not found! Set it with: setx GEMINI_API_KEY \"your_key_here\"")
17+
else:
18+
print("✅ Gemini API key loaded")
19+
genai.configure(api_key=GEMINI_KEY)
720

21+
# -----------------------------
22+
# Flask App
23+
# -----------------------------
824
app = Flask(__name__, static_folder=".", static_url_path="")
9-
CORS(app)
25+
CORS(app, resources={r"/*": {"origins": "*"}})
26+
27+
pdf_text_context = "" # store extracted PDF text globally
1028

11-
@app.route('/extract_metadata', methods=['POST'])
12-
def extract_metadata():
29+
# -----------------------------
30+
# Helper functions
31+
# -----------------------------
32+
def extract_text_from_pdf(file):
33+
text = ""
34+
pdf_document = fitz.open(stream=file.read(), filetype="pdf")
35+
for page in pdf_document:
36+
text += page.get_text()
37+
return text
38+
39+
def generate_summary(text):
40+
return text[:900] + "..." if len(text) > 900 else text
41+
42+
def extract_keywords(text, top_n=5):
43+
words = re.findall(r'\b\w+\b', text.lower())
44+
common = Counter(w for w in words if len(w) > 4).most_common(top_n)
45+
return [{"word": w, "count": c} for w, c in common]
46+
47+
# -----------------------------
48+
# Routes
49+
# -----------------------------
50+
@app.route('/summarize', methods=['POST'])
51+
def summarize():
52+
global pdf_text_context
1353
try:
1454
file = request.files['file']
15-
16-
# Read PDF
1755
reader = PyPDF2.PdfReader(file)
18-
info = reader.metadata or {}
19-
20-
# Extract fields safely
21-
title = info.get("/Title", "") or ""
22-
authors = info.get("/Author", "") or ""
23-
journal = info.get("/Creator", "") or ""
24-
keywords = info.get("/Keywords", "") or ""
25-
26-
# Extract year
27-
year = ""
28-
if "/doi" in info:
29-
match = re.search(r"(19|20)\d{2}", info["/doi"])
30-
if match:
31-
year = match.group(0)
32-
elif "/ModDate" in info:
33-
match = re.search(r"(19|20)\d{2}", info["/ModDate"])
34-
if match:
35-
year = match.group(0)
36-
37-
metadata = {
38-
"title": title,
39-
"authors": authors,
40-
"year": year,
41-
"journal": journal,
42-
"keywords": keywords
43-
}
44-
45-
if not any(metadata.values()):
46-
return jsonify({"error": "Unable to fetch metadata, please fill manually"}), 200
47-
48-
return jsonify(metadata)
49-
56+
text = "".join([p.extract_text() or "" for p in reader.pages])
57+
pdf_text_context = text
58+
59+
summary = generate_summary(text)
60+
keywords = extract_keywords(text)
61+
62+
return jsonify({"summary": summary, "keywords": keywords, "full_text": text})
5063
except Exception as e:
64+
print("Error in /summarize:", e)
5165
return jsonify({"error": str(e)}), 500
5266

67+
@app.route('/chat', methods=['POST'])
68+
def chat():
69+
global pdf_text_context
70+
data = request.get_json()
71+
user_message = data.get("message", "")
72+
73+
if not pdf_text_context:
74+
return jsonify({"error": "No PDF context available. Upload a PDF first."}), 400
75+
if not GEMINI_KEY:
76+
return jsonify({"error": "Gemini API key not configured!"}), 500
77+
78+
try:
79+
prompt = f"""
80+
You are an AI research assistant. Use the following content to answer the question.
81+
82+
Content:
83+
{pdf_text_context[:3000]}
84+
85+
Question: {user_message}
86+
"""
87+
88+
model= genai.GenerativeModel("gemini-2.5-flash")
89+
90+
response = model.generate_content(prompt)
91+
92+
ai_response = getattr(response, "text", None)
93+
if not ai_response:
94+
return jsonify({"response": "⚠️ Gemini returned no text. Check logs for details."})
95+
96+
return jsonify({"response": ai_response})
97+
except Exception as e:
98+
print("Error in /chat:", e)
99+
return jsonify({"error": str(e)}), 500
53100

54-
# Serve index.html directly
55101
@app.route("/")
56102
def index():
57103
return send_from_directory(os.getcwd(), "index.html")
58104

59-
105+
# -----------------------------
106+
# Run the app
107+
# -----------------------------
60108
if __name__ == "__main__":
61109
port = 5000
62110
url = f"http://localhost:{port}/"
63-
webbrowser.open(url) # auto-open browser
111+
webbrowser.open(url)
64112
app.run(host="0.0.0.0", port=port, debug=True)

0 commit comments

Comments
 (0)