|
1 | 1 | import streamlit as st
|
| 2 | +import PyPDF2 |
2 | 3 | from gtts import gTTS
|
3 |
| -import fitz |
4 | 4 | import tempfile
|
| 5 | +import matplotlib.pyplot as plt |
| 6 | +from collections import Counter |
| 7 | +import re |
| 8 | +import io |
5 | 9 |
|
| 10 | +# ------------------- APP CONFIG ------------------- |
| 11 | +st.set_page_config(page_title="AI Research Paper Assistant", page_icon="📄", layout="wide") |
6 | 12 |
|
7 |
| -# APP CONFIG |
8 |
| -st.set_page_config(page_title="AI Research Paper Assistant", page_icon="📄", layout="centered") |
9 |
| - |
10 |
| - |
11 |
| -# CUSTOM CSS |
12 |
| -st.markdown( |
13 |
| - """ |
14 |
| - <style> |
15 |
| - /* Background */ |
16 |
| - .stApp { background-color: #9fcbf5 !important; color: black !important; } |
17 |
| -
|
18 |
| - /* Global text */ |
19 |
| - h1, h2, h3, h4, h5, h6, p, label, span { color: black !important; } |
20 |
| -
|
21 |
| - /* Title */ |
22 |
| - .title-card { |
23 |
| - background-color: white; |
24 |
| - padding: 20px; |
25 |
| - border-radius: 15px; |
26 |
| - box-shadow: 0px 4px 8px rgba(0,0,0,0.08); |
27 |
| - text-align: center; |
28 |
| - margin-bottom: 20px; |
29 |
| - color: black !important; |
30 |
| - } |
31 |
| -
|
32 |
| - /* File uploader box */ |
33 |
| - div[data-testid="stFileUploader"] { |
34 |
| - border: 2px dashed #4a90e2 !important; |
35 |
| - background-color: #f5f2f2 !important; |
36 |
| - border-radius: 12px; |
37 |
| - padding: 30px; |
38 |
| - } |
39 |
| -
|
40 |
| - /* Buttons */ |
41 |
| - div.stButton > button { border-radius: 8px; padding: 10px 22px; font-weight: 600; border: none; } |
42 |
| -
|
43 |
| - /* Upload & Summarize button */ |
44 |
| - div.stButton > button:first-child { |
45 |
| - background: linear-gradient(90deg, #2f80ed, #56ccf2) !important; |
46 |
| - color: white !important; |
47 |
| - } |
48 |
| -
|
49 |
| - /* Back button */ |
50 |
| - div.stButton > button[kind="secondary"], div.stButton > button:nth-child(2) { |
51 |
| - background-color: #e6e9f5 !important; |
52 |
| - color: black !important; |
53 |
| - } |
54 |
| - </style> |
55 |
| - """, |
56 |
| - unsafe_allow_html=True |
57 |
| -) |
58 |
| - |
59 |
| - |
60 |
| -# SESSION STATE INIT |
61 |
| -if "uploaded_file" not in st.session_state: |
62 |
| - st.session_state.uploaded_file = None |
| 13 | +# ------------------- SESSION STATE ------------------- |
63 | 14 | if "summary" not in st.session_state:
|
64 | 15 | st.session_state.summary = ""
|
| 16 | +if "detailed_summary" not in st.session_state: |
| 17 | + st.session_state.detailed_summary = "" |
65 | 18 | if "audio_file" not in st.session_state:
|
66 | 19 | st.session_state.audio_file = None
|
| 20 | +if "pdf_text" not in st.session_state: |
| 21 | + st.session_state.pdf_text = "" |
| 22 | +if "chat_history" not in st.session_state: |
| 23 | + st.session_state.chat_history = [] |
67 | 24 |
|
68 |
| - |
69 |
| -# FUNCTIONS |
70 |
| -def extract_text_from_pdf(pdf_file): |
| 25 | +# ------------------- FUNCTIONS ------------------- |
| 26 | +def extract_text_from_pdf(uploaded_file): |
71 | 27 | text = ""
|
72 |
| - pdf_document = fitz.open(stream=pdf_file.read(), filetype="pdf") |
73 |
| - for page_num in range(len(pdf_document)): |
74 |
| - text += pdf_document[page_num].get_text() |
| 28 | + reader = PyPDF2.PdfReader(uploaded_file) |
| 29 | + for page in reader.pages: |
| 30 | + page_text = page.extract_text() or "" |
| 31 | + text += page_text |
75 | 32 | return text
|
76 | 33 |
|
77 |
| -def generate_summary(text): |
78 |
| - return text[:900] + "..." if len(text) > 900 else text |
| 34 | +def generate_summaries(text): |
| 35 | + # Simulated AI Summaries (Replace with LLM API later) |
| 36 | + tldr = text[:300] + "..." if len(text) > 300 else text |
| 37 | + detailed = text[:1000] + "..." if len(text) > 1000 else text |
| 38 | + return tldr, detailed |
| 39 | + |
| 40 | +def generate_visualizations(text): |
| 41 | + # Basic visualization: word frequency |
| 42 | + words = re.findall(r'\b\w+\b', text.lower()) |
| 43 | + common_words = [w for w in words if len(w) > 4] |
| 44 | + counter = Counter(common_words) |
| 45 | + most_common = counter.most_common(5) |
| 46 | + labels, values = zip(*most_common) |
| 47 | + |
| 48 | + fig, ax = plt.subplots() |
| 49 | + ax.bar(labels, values, color="skyblue") |
| 50 | + ax.set_title("Top Keywords in Paper") |
| 51 | + ax.set_ylabel("Frequency") |
| 52 | + |
| 53 | + buf = io.BytesIO() |
| 54 | + plt.savefig(buf, format="png") |
| 55 | + buf.seek(0) |
| 56 | + return buf |
79 | 57 |
|
80 | 58 | def text_to_speech(text):
|
81 | 59 | tts = gTTS(text=text, lang="en")
|
82 | 60 | temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
|
83 | 61 | tts.save(temp_file.name)
|
84 | 62 | return temp_file.name
|
85 | 63 |
|
| 64 | +def chat_with_ai(user_input): |
| 65 | + # Simulated chatbot response |
| 66 | + response = f"This is a simulated answer to: '{user_input}'. Replace with AI model API." |
| 67 | + return response |
86 | 68 |
|
87 |
| -# TITLE |
88 |
| -st.markdown("<div class='title-card'><h2>📑 Upload Your Research Paper</h2><p>Upload a PDF to get a concise summary</p></div>", unsafe_allow_html=True) |
89 |
| - |
90 |
| - |
91 |
| -# MAIN CARD |
92 |
| -st.markdown("<div class='main-card'>", unsafe_allow_html=True) |
93 |
| -uploaded_file = st.file_uploader("Choose PDF File", type=["pdf"]) |
94 |
| - |
95 |
| -# Buttons |
96 |
| -col1, col2 = st.columns([1, 2]) |
97 |
| -with col1: |
98 |
| - summarize_btn = st.button("📑 Upload & Summarize") |
99 |
| -with col2: |
100 |
| - home_btn = st.button("⬅️ Back to Home") |
101 |
| -st.markdown("</div>", unsafe_allow_html=True) |
102 |
| - |
103 |
| - |
104 |
| -# BUTTON LOGIC |
105 |
| -if home_btn: |
106 |
| - st.session_state.uploaded_file = None |
107 |
| - st.session_state.summary = "" |
108 |
| - st.session_state.audio_file = None |
109 |
| - st.info("Back to Home clicked!") |
| 69 | +# ------------------- UI HEADER ------------------- |
| 70 | +st.title("📄 AI Research Paper Assistant") |
| 71 | +st.markdown("Upload a PDF, explore summaries, keyword charts, and chat with an AI assistant.") |
110 | 72 |
|
| 73 | +uploaded_file = st.file_uploader("Upload PDF", type=["pdf"]) |
111 | 74 |
|
112 |
| -# Upload & Summarize button |
113 |
| -if summarize_btn and uploaded_file: |
114 |
| - with st.spinner("Extracting & summarizing..."): |
| 75 | +# ------------------- PROCESS PDF ------------------- |
| 76 | +if uploaded_file and st.button("🔍 Process PDF"): |
| 77 | + with st.spinner("Reading and summarizing your PDF..."): |
115 | 78 | text = extract_text_from_pdf(uploaded_file)
|
116 |
| - st.session_state.summary = generate_summary(text) |
117 |
| - st.session_state.audio_file = text_to_speech(st.session_state.summary) |
118 |
| - st.success("✅ Summary Generated!") |
119 |
| - |
120 |
| - |
121 |
| -# DISPLAY SUMMARY |
122 |
| -if st.session_state.summary: |
123 |
| - st.markdown("<div class='main-card'>", unsafe_allow_html=True) |
124 |
| - st.text_area("Summary", st.session_state.summary, height=250) |
125 |
| - if st.button("🔊 Read Summary Aloud"): |
126 |
| - if st.session_state.audio_file: |
127 |
| - st.audio(st.session_state.audio_file, format="audio/mp3") |
128 |
| - st.markdown("</div>", unsafe_allow_html=True) |
| 79 | + st.session_state.pdf_text = text |
| 80 | + tldr, detailed = generate_summaries(text) |
| 81 | + st.session_state.summary = tldr |
| 82 | + st.session_state.detailed_summary = detailed |
| 83 | + st.session_state.audio_file = text_to_speech(tldr) |
| 84 | + st.success("✅ PDF processed successfully!") |
| 85 | + |
| 86 | +# ------------------- TABS ------------------- |
| 87 | +if st.session_state.pdf_text: |
| 88 | + tab1, tab2, tab3 = st.tabs(["🧠 Summary", "📊 Visuals", "🤖 AI Assistant"]) |
| 89 | + |
| 90 | + # --- TAB 1: Summary --- |
| 91 | + with tab1: |
| 92 | + st.subheader("TL;DR Summary") |
| 93 | + st.write(st.session_state.summary) |
| 94 | + |
| 95 | + st.subheader("Detailed Summary") |
| 96 | + st.write(st.session_state.detailed_summary) |
| 97 | + |
| 98 | + if st.button("🔊 Read Summary Aloud"): |
| 99 | + if st.session_state.audio_file: |
| 100 | + st.audio(st.session_state.audio_file, format="audio/mp3") |
| 101 | + |
| 102 | + # --- TAB 2: Visuals --- |
| 103 | + with tab2: |
| 104 | + st.subheader("Keyword Analysis") |
| 105 | + chart = generate_visualizations(st.session_state.pdf_text) |
| 106 | + st.image(chart, caption="Top Keywords in Paper") |
| 107 | + |
| 108 | + # --- TAB 3: AI Assistant --- |
| 109 | + with tab3: |
| 110 | + st.subheader("Chat with AI Assistant") |
| 111 | + user_query = st.text_input("Ask a question about this paper") |
| 112 | + if st.button("💬 Send"): |
| 113 | + if user_query.strip(): |
| 114 | + ai_response = chat_with_ai(user_query) |
| 115 | + st.session_state.chat_history.append(("You", user_query)) |
| 116 | + st.session_state.chat_history.append(("AI", ai_response)) |
| 117 | + |
| 118 | + for sender, msg in st.session_state.chat_history: |
| 119 | + if sender == "You": |
| 120 | + st.markdown(f"**🧑 You:** {msg}") |
| 121 | + else: |
| 122 | + st.markdown(f"**🤖 AI:** {msg}") |
| 123 | +else: |
| 124 | + st.info("📂 Upload a PDF and click **Process PDF** to begin.") |
0 commit comments