diff --git a/background.js b/background.js index 0c06e9e..1055a88 100755 --- a/background.js +++ b/background.js @@ -1,16 +1,256 @@ // background.js import { addToArchiveBox } from "./utils.js"; +import * as RedditHandler from "./reddit-handler.js"; -chrome.runtime.onMessage.addListener(async (message) => { - const options_url = chrome.runtime.getURL('options.html') + `?search=${message.id}`; - console.log('i ArchiveBox Collector showing options.html', options_url); - if (message.action === 'openOptionsPage') { - await chrome.tabs.create({ url: options_url }); +// Debug configuration +const DEBUG_MODE = true; // Set to true to see debugging info + +// Configuration +const CONFIG = { + MAX_ENTRIES: 10000, // Maximum number of entries to store locally + STATUS_DISPLAY_TIME: 3000 // Time in ms to show status indicators +}; + +function debugLog(...args) { + if (DEBUG_MODE) { + console.log('[ArchiveBox Debug]', ...args); + } +} + +// State management - sites handlers registry +const siteHandlers = { + reddit: RedditHandler +}; + +// Content capture configuration +let captureEnabled = false; + +// Initialize background script +async function initialize() { + debugLog('Initializing background script'); + + // Load configuration + const { enableScrollCapture } = await chrome.storage.local.get('enableScrollCapture'); + captureEnabled = !!enableScrollCapture; + + // Initialize site handlers + if (captureEnabled) { + debugLog('Content capture is enabled, initializing handlers'); + Object.values(siteHandlers).forEach(handler => { + if (typeof handler.initialize === 'function') { + handler.initialize(); + } + }); + } + + // Check all existing tabs to find any supported site tabs already open + chrome.tabs.query({}, (tabs) => { + if (captureEnabled) { + debugLog(`Found ${tabs.length} existing tabs, checking for supported sites`); + + // Check each tab for supported sites + tabs.forEach(tab => { + if (tab.url) { + Object.entries(siteHandlers).forEach(([site, handler]) => { + if (handler.shouldCaptureUrl && handler.shouldCaptureUrl(tab.url)) { + debugLog(`Found existing ${site} tab:`, tab.url); + if (handler.injectContentScript) { + handler.injectContentScript(tab.id); + } + } + }); + } + }); } }); + + debugLog('Background script initialized'); +} + +/** + * Listens for messages from content scripts and popup + */ +chrome.runtime.onMessage.addListener((message, sender, sendResponse) => { + debugLog('Message received:', message.type || message.action); + + // Handle opening options page + if (message.action === 'openOptionsPage') { + const options_url = chrome.runtime.getURL('options.html') + `?search=${message.id}`; + debugLog('Opening options page:', options_url); + chrome.tabs.create({ url: options_url }); + } + + // Handle archivebox_add + if (message.type === 'archivebox_add') { + debugLog('ArchiveBox add request'); + addToArchiveBox(message.body, sendResponse, sendResponse); + return true; // Keep the message channel open for the async response + } + + // Handle content capture + if (message.type === 'capture') { + debugLog('Capture request received:', message.entry.url); + + if (!captureEnabled) { + debugLog('Content capture is disabled, ignoring capture request'); + sendResponse({ success: false, reason: 'Capture disabled' }); + return true; + } + + // Determine site handler based on URL or tags + const url = message.entry.url; + let handled = false; + + // Check if it's from Reddit + if (message.entry.tags.includes('reddit') || url.includes('reddit.com')) { + if (message.entry.priority === 'high') { + // Use high priority capture for viewport posts + RedditHandler.captureHighPriority(message.entry, sender.tab?.id); + } else { + // Let reddit handler decide what to do + RedditHandler.queueForCapture(message.entry, sender.tab?.id, 'normal'); + } + handled = true; + } + + // Generic handling for other sites or if no specific handler was found + if (!handled) { + saveEntry(message.entry); + } + + sendResponse({ success: true }); + } + // Enable status requests + if (message.type === 'getEnableStatus') { + chrome.storage.local.get(['enableScrollCapture'], (result) => { + sendResponse({ enableScrollCapture: !!result.enableScrollCapture }); + }); + return true; // Keep the message channel open for async response + } + + // Show status notification in tabs + if (message.type === 'showStatus') { + const tabId = message.tabId || (sender.tab && sender.tab.id); + if (tabId) { + try { + showStatusInTab(tabId, message.message, message.count, message.immediate); + } catch (err) { + debugLog('Error showing status:', err); + } + } + sendResponse({ success: true }); + } + + // Get site handler stats + if (message.type === 'getStats') { + const stats = {}; + Object.entries(siteHandlers).forEach(([site, handler]) => { + if (handler.getStats) { + stats[site] = handler.getStats(); + } + }); + sendResponse({ stats }); + return true; + } + if (message.type === 'getSiteHandlerForUrl') { + try { + const url = message.url; + const handlerResult = findHandlerForUrl(url); + + if (handlerResult) { + const { id, handler } = handlerResult; + const handlers = getAllHandlers(); + const handlerInfo = handlers[id]; + + sendResponse({ + found: true, + handler: { + id, + name: handlerInfo.name, + description: handlerInfo.description, + version: handlerInfo.version + } + }); + } else { + sendResponse({ found: false }); + } + } catch (error) { + console.error('Error finding handler for URL:', error); + sendResponse({ found: false, error: error.message }); + } + return true; + } + + // Get all site handlers + if (message.type === 'getSiteHandlers') { + try { + const handlers = getAllHandlers(); + sendResponse({ handlers }); + } catch (error) { + console.error('Error getting site handlers:', error); + sendResponse({ handlers: {} }); + } + return true; + } + + // URL visited notification + if (message.type === 'urlVisited') { + try { + const url = message.url; + const handlerResult = findHandlerForUrl(url); + + if (handlerResult && typeof handlerResult.handler.onUrlVisited === 'function') { + handlerResult.handler.onUrlVisited(url); + } + + sendResponse({ success: true }); + } catch (error) { + console.error('Error handling URL visit:', error); + sendResponse({ success: false }); + } + return true; + } + + // Configuration change notification + if (message.type === 'captureConfigChanged') { + try { + const { config } = message; + + // Update enabled state + captureEnabled = !!config.enableScrollCapture; + + // Notify handlers + Object.values(siteHandlers).forEach(handler => { + if (typeof handler.onConfigChanged === 'function') { + handler.onConfigChanged(config); + } + }); + + sendResponse({ success: true }); + } catch (error) { + console.error('Error handling config change:', error); + sendResponse({ success: false }); + } + return true; + } + + return true; // Indicate async response +}); + +/** + * Handle click on extension icon + */ chrome.action.onClicked.addListener(async (tab) => { + debugLog('Extension icon clicked on tab:', tab.url); + + // Don't try to execute script on chrome:// URLs + if (tab.url.startsWith('chrome://')) { + debugLog('Cannot execute on chrome:// URL, skipping'); + return; + } + const entry = { id: crypto.randomUUID(), url: tab.url, @@ -20,30 +260,36 @@ chrome.action.onClicked.addListener(async (tab) => { favicon: tab.favIconUrl }; + debugLog('Created entry from tab click:', entry); + // Save the entry first const { entries = [] } = await chrome.storage.local.get('entries'); entries.push(entry); await chrome.storage.local.set({ entries }); + debugLog('Entry saved to local storage'); - // Inject scripts - CSS now handled in popup.js + // Inject popup script + debugLog('Injecting popup script into tab'); await chrome.scripting.executeScript({ target: { tabId: tab.id }, files: ['popup.js'] + }).catch(err => { + console.error('Error injecting script:', err); }); }); -chrome.runtime.onMessage.addListener((message, sender, sendResponse) => { - if (message.type === 'archivebox_add') { - addToArchiveBox(message.body, sendResponse, sendResponse); +/** + * Handle context menu click + */ +chrome.contextMenus.onClicked.addListener(async function(item, tab) { + debugLog('Context menu save clicked for tab:', tab.url); + + // Don't try to execute script on chrome:// URLs + if (tab.url.startsWith('chrome://')) { + debugLog('Cannot execute on chrome:// URL, skipping'); + return; } - return true; -}); - - -chrome.contextMenus.onClicked.addListener(onClickContextMenuSave); - -// A generic onclick callback function. -async function onClickContextMenuSave(item, tab) { + const entry = { id: crypto.randomUUID(), url: tab.url, @@ -53,20 +299,328 @@ async function onClickContextMenuSave(item, tab) { favicon: tab.favIconUrl }; + debugLog('Created entry from context menu:', entry); + // Save the entry first const { entries = [] } = await chrome.storage.local.get('entries'); entries.push(entry); await chrome.storage.local.set({ entries }); + debugLog('Entry saved to local storage'); - // Inject scripts - CSS now handled in popup.js + // Inject popup script + debugLog('Injecting popup script into tab'); await chrome.scripting.executeScript({ target: { tabId: tab.id }, files: ['popup.js'] + }).catch(err => { + console.error('Error injecting script:', err); }); -} +}); + +/** + * Handle extension installation and updates + */ chrome.runtime.onInstalled.addListener(function () { + debugLog('Extension installed or updated'); + + // Create context menu chrome.contextMenus.create({ id: 'save_to_archivebox_ctxmenu', title: 'Save to ArchiveBox', }); + + // Set up configuration defaults + initializeConfiguration(); + + // Initialize the extension + initialize(); }); + +/** + * Set up configuration defaults if needed + */ +async function initializeConfiguration() { + const config = await chrome.storage.local.get([ + 'archivebox_server_url', + 'archivebox_api_key', + 'enableScrollCapture', + 'scrollCaptureTags' + ]); + + const updates = {}; + + // Set default values if undefined + if (config.archivebox_server_url === undefined) { + updates.archivebox_server_url = ''; + } + + if (config.archivebox_api_key === undefined) { + updates.archivebox_api_key = ''; + } + + if (config.enableScrollCapture === undefined) { + updates.enableScrollCapture = false; + } + + if (config.scrollCaptureTags === undefined) { + updates.scrollCaptureTags = ''; + } + + // Save defaults if needed + if (Object.keys(updates).length > 0) { + await chrome.storage.local.set(updates); + debugLog('Set default config values:', updates); + } +} + +/** + * Handle new tab creation + */ +chrome.tabs.onCreated.addListener((tab) => { + // We'll check if it's a supported site tab once the navigation completes + debugLog('New tab created:', tab.id); +}); + +/** + * Handle tab navigation to detect supported sites + */ +chrome.tabs.onUpdated.addListener(async (tabId, changeInfo, tab) => { + // Only react when the tab has completed loading and we have a URL + if (changeInfo.status === 'complete' && tab.url) { + // Check if content capture is enabled + const { enableScrollCapture } = await chrome.storage.local.get('enableScrollCapture'); + captureEnabled = !!enableScrollCapture; + + if (captureEnabled) { + debugLog('Tab updated, checking for supported sites:', tab.url); + + // Check URL against each site handler + Object.entries(siteHandlers).forEach(([site, handler]) => { + if (handler.shouldCaptureUrl && handler.shouldCaptureUrl(tab.url)) { + debugLog(`Detected ${site} site in tab:`, tab.url); + if (handler.injectContentScript) { + handler.injectContentScript(tabId); + } + } + }); + } + } +}); + +/** + * Generic entry saving logic for any URL + */ +async function saveEntry(entry) { + try { + if (!entry || !entry.url) { + debugLog('Invalid entry, not saving', entry); + return { success: false, reason: 'Invalid entry' }; + } + + debugLog('Saving entry:', entry.url); + + // Get current entries + const { entries = [] } = await chrome.storage.local.get('entries'); + + // Check for duplicates + const normalizeUrl = (url) => { + try { + const normalized = new URL(url); + return normalized.origin + normalized.pathname.replace(/\/$/, ''); + } catch (e) { + debugLog('URL normalization error:', e); + return url; + } + }; + + const normalizedEntryUrl = normalizeUrl(entry.url); + const existingEntry = entries.find(e => normalizeUrl(e.url) === normalizedEntryUrl); + + if (existingEntry) { + debugLog('URL already exists in entries, skipping:', entry.url); + return { success: false, reason: 'URL already exists' }; + } + + // Add custom tags if configured + const { scrollCaptureTags } = await chrome.storage.local.get(['scrollCaptureTags']); + const customTags = scrollCaptureTags ? + scrollCaptureTags.split(',').map(tag => tag.trim()) : []; + + // Extract site tags + const siteTags = getSiteTags(entry.url); + + // Create the full entry object + const fullEntry = { + id: entry.id || crypto.randomUUID(), + url: entry.url, + timestamp: entry.timestamp || new Date().toISOString(), + tags: ['auto-captured', ...siteTags, ...customTags, ...(entry.tags || [])], + title: entry.title || 'Captured content', + notes: entry.notes || `Auto-captured content: ${entry.url}`, + favicon: entry.favicon + }; + + // Add to entries + entries.push(fullEntry); + + // Limit entries if exceeding maximum + if (entries.length > CONFIG.MAX_ENTRIES) { + // Sort by timestamp (oldest first) and remove excess + entries.sort((a, b) => new Date(a.timestamp) - new Date(b.timestamp)); + const removed = entries.splice(0, entries.length - CONFIG.MAX_ENTRIES); + debugLog(`Removed ${removed.length} oldest entries to stay under limit`); + } + + // Save entries + await chrome.storage.local.set({ entries }); + debugLog('Entry saved to local storage'); + + return { success: true }; + } catch (e) { + debugLog('Error saving entry:', e); + return { success: false, reason: e.message }; + } +} + +/** + * Extract site name for tagging + */ +function getSiteTags(url) { + try { + const hostname = new URL(url).hostname; + const domain = hostname + .replace('www.', '') + .replace(/\.(com|org|net|io|gov|edu)$/, ''); + return [domain]; + } catch (e) { + debugLog('Error extracting site tags:', e); + return []; + } +} + +/** + * Show status message in tab + */ +async function showStatusInTab(tabId, message, count, immediate = false) { + try { + // Check if tab still exists before proceeding + try { + const tab = await chrome.tabs.get(tabId); + if (!tab) { + debugLog(`Tab ${tabId} no longer exists, skipping status update`); + return; + } + } catch (e) { + debugLog(`Tab ${tabId} error or no longer exists:`, e.message); + return; + } + + // Setup status indicator if not already present + await chrome.scripting.executeScript({ + target: { tabId: tabId }, + function: setupStatusIndicator, + }).catch(err => { + debugLog(`Error setting up status indicator in tab ${tabId}:`, err.message); + return; + }); + + // Show the status message + await chrome.scripting.executeScript({ + target: { tabId: tabId }, + args: [message, count || 0, immediate], + function: (message, count, immediate) => { + // Add to status queue + if (!window.archiveBoxStatusQueue) window.archiveBoxStatusQueue = []; + window.archiveBoxStatusQueue.unshift(message); + + // Keep only 5 items + if (window.archiveBoxStatusQueue.length > 5) { + window.archiveBoxStatusQueue = window.archiveBoxStatusQueue.slice(0, 5); + } + + // Show status + const indicator = document.getElementById('archiveBoxStatusIndicator'); + const messageContainer = document.getElementById('archiveBoxStatusMessages'); + const countIndicator = document.getElementById('archiveBoxStatusCount'); + + if (indicator && messageContainer && countIndicator) { + // Update message list + messageContainer.innerHTML = window.archiveBoxStatusQueue.map(msg => + `
• ${msg}
` + ).join(''); + + // Update count + countIndicator.textContent = `Captured ${count} posts`; + + // Show indicator + indicator.style.opacity = '1'; + + // Auto hide + clearTimeout(window.archiveBoxStatusTimeout); + window.archiveBoxStatusTimeout = setTimeout(() => { + indicator.style.opacity = '0'; + }, 3000); + } + } + }).catch(err => { + debugLog(`Error showing status in tab ${tabId}:`, err.message); + }); + } catch (err) { + debugLog('Error showing status:', err); + } +} + +/** + * Setup status indicator in tab + */ +function setupStatusIndicator() { + if (!document.getElementById('archiveBoxStatusIndicator')) { + const indicator = document.createElement('div'); + indicator.id = 'archiveBoxStatusIndicator'; + indicator.style.cssText = ` + position: fixed; + bottom: 10px; + right: 10px; + background: rgba(0, 0, 0, 0.7); + color: white; + padding: 10px; + border-radius: 5px; + font-size: 12px; + z-index: 9999; + transition: opacity 0.5s; + opacity: 0; + max-width: 300px; + max-height: 200px; + overflow-y: auto; + line-height: 1.3; + `; + document.body.appendChild(indicator); + + // Create a container for the message list + const messageContainer = document.createElement('div'); + messageContainer.id = 'archiveBoxStatusMessages'; + indicator.appendChild(messageContainer); + + // Create a count indicator + const countIndicator = document.createElement('div'); + countIndicator.id = 'archiveBoxStatusCount'; + countIndicator.style.cssText = ` + margin-top: 5px; + font-weight: bold; + text-align: center; + border-top: 1px solid rgba(255, 255, 255, 0.3); + padding-top: 5px; + `; + indicator.appendChild(countIndicator); + + // Initialize status queue + window.archiveBoxStatusQueue = []; + } +} + +// Initialize on startup +chrome.runtime.onStartup.addListener(() => { + debugLog('Extension started'); + initialize(); +}); + diff --git a/config-tab.js b/config-tab.js index 42a897c..7635215 100755 --- a/config-tab.js +++ b/config-tab.js @@ -1,5 +1,6 @@ // Config tab initialization and handlers import { updateStatusIndicator, syncToArchiveBox, getArchiveBoxServerUrl } from './utils.js'; +import { getAllHandlers, getAllStats } from './site-handlers.js'; export async function initializeConfigTab() { const configForm = document.getElementById('configForm'); @@ -15,10 +16,9 @@ export async function initializeConfigTab() { 'match_urls', 'exclude_urls', ]); - console.log('Got config values from storage:', archivebox_server_url, archivebox_api_key, match_urls, exclude_urls); // migrate old config_archiveboxBaseUrl to archivebox_server_url - const {config_archiveBoxBaseUrl} = await chrome.storage.sync.get('config_archiveboxBaseUrl', ); + const {config_archiveBoxBaseUrl} = await chrome.storage.sync.get('config_archiveBoxBaseUrl', ); if (config_archiveBoxBaseUrl) { await chrome.storage.local.set({ archivebox_server_url: config_archiveBoxBaseUrl }); } @@ -209,7 +209,7 @@ export async function initializeConfigTab() { }; const result = await syncToArchiveBox(testEntry); - document.getElementById('inprogress-test').remove(); + document.getElementById('inprogress-test')?.remove(); if (result.ok) { testStatus.innerHTML += ` @@ -241,6 +241,278 @@ export async function initializeConfigTab() { testButton.click(); } }); + + // Initialize site-specific capture settings + await initializeSiteCapture(); +} + +/** + * Initialize site-specific capture settings + */ +async function initializeSiteCapture() { + // Load scroll capture settings + const enableScrollCapture = document.getElementById('enableScrollCapture'); + const scrollCaptureTags = document.getElementById('scrollCaptureTags'); + + const { + enableScrollCapture: savedEnableScrollCapture, + scrollCaptureTags: savedScrollCaptureTags, + redditCaptureConfig + } = await chrome.storage.local.get([ + 'enableScrollCapture', + 'scrollCaptureTags', + 'redditCaptureConfig' + ]); + + enableScrollCapture.checked = !!savedEnableScrollCapture; + scrollCaptureTags.value = savedScrollCaptureTags || ''; + + // Add event handlers for scroll capture settings + enableScrollCapture.addEventListener('change', async () => { + await chrome.storage.local.set({ enableScrollCapture: enableScrollCapture.checked }); + + // Notify all tabs of the change + const tabs = await chrome.tabs.query({}); + for (const tab of tabs) { + try { + chrome.tabs.sendMessage(tab.id, { + type: 'captureStatusChanged', + enabled: enableScrollCapture.checked + }).catch(() => {/* Ignore errors for tabs that don't have the content script */}); + } catch (e) { + // Ignore errors for tabs that don't have the content script + } + } + }); + + scrollCaptureTags.addEventListener('change', async () => { + await chrome.storage.local.set({ scrollCaptureTags: scrollCaptureTags.value }); + }); + + // Initialize Reddit-specific settings + await initializeRedditSettings(redditCaptureConfig); + + // Add site handlers information + populateSiteHandlersInfo(); + + // Add capture stats display + await updateCaptureStats(); + + // Set up stats refresh button + document.getElementById('refreshCaptureStats')?.addEventListener('click', updateCaptureStats); +} + +/** + * Initialize Reddit-specific settings + */ +async function initializeRedditSettings(savedConfig) { + // Default configuration + const defaultConfig = { + captureSubreddits: true, + capturePostDetails: true, + captureComments: false, + commentsDepth: 2, + excludedSubreddits: [], + includedSubreddits: [], + maxProcessedPosts: 1000 + }; + + // Merge saved config with defaults + const config = { ...defaultConfig, ...(savedConfig || {}) }; + + // Create Reddit-specific settings UI if it doesn't exist + const redditSettingsContainer = document.getElementById('redditSettingsContainer'); + if (!redditSettingsContainer) { + return; // Element doesn't exist, can't add settings + } + + // Build the Reddit settings UI + redditSettingsContainer.innerHTML = ` +
+
+
Reddit Capture Settings
+
+
+
+
+
+ + +
+
+ + +
+
+ + +
+
+
+
+ + +
+
+ + +
Maximum number of post IDs to keep in memory (100-10000)
+
+
+
+ +
+
+ + +
Only posts from these subreddits will be captured
+
+
+ + +
Posts from these subreddits will never be captured
+
+
+ + +
+
+ `; + + // Add event listener for saving settings + document.getElementById('saveRedditSettings').addEventListener('click', async () => { + // Collect the current settings + const newConfig = { + captureSubreddits: document.getElementById('redditCaptureSubreddits').checked, + capturePostDetails: document.getElementById('redditCapturePostDetails').checked, + captureComments: document.getElementById('redditCaptureComments').checked, + commentsDepth: parseInt(document.getElementById('redditCommentsDepth').value, 10), + maxProcessedPosts: parseInt(document.getElementById('redditMaxProcessedPosts').value, 10), + includedSubreddits: document.getElementById('redditIncludedSubreddits').value + .split(',') + .map(s => s.trim().toLowerCase()) + .filter(s => s), + excludedSubreddits: document.getElementById('redditExcludedSubreddits').value + .split(',') + .map(s => s.trim().toLowerCase()) + .filter(s => s) + }; + + // Validate settings + if (newConfig.maxProcessedPosts < 100) newConfig.maxProcessedPosts = 100; + if (newConfig.maxProcessedPosts > 10000) newConfig.maxProcessedPosts = 10000; + + // Save the settings + await chrome.storage.local.set({ redditCaptureConfig: newConfig }); + + // Show success message + alert('Reddit settings saved successfully'); + }); +} + +/** + * Populate site handlers information + */ +function populateSiteHandlersInfo() { + const handlersContainer = document.getElementById('siteHandlersContainer'); + if (!handlersContainer) return; + + const handlers = getAllHandlers(); + + // Create the handlers info UI + handlersContainer.innerHTML = ` +
+
+
Site Handlers
+
+
+ + + + + + + + + + + ${Object.entries(handlers).map(([id, handler]) => ` + + + + + + + `).join('')} + +
HandlerDomainsVersionDescription
${handler.name}${handler.domains.join(', ')}${handler.version}${handler.description}
+
+
+ `; +} + +/** + * Update capture stats + */ +async function updateCaptureStats() { + const statsContainer = document.getElementById('captureStatsContainer'); + if (!statsContainer) return; + + // Get stats from all handlers + const stats = await new Promise(resolve => { + chrome.runtime.sendMessage({ type: 'getStats' }, response => { + resolve(response?.stats || {}); + }); + }); + + // Create the stats UI + statsContainer.innerHTML = ` +
+
+
Capture Statistics
+ +
+
+
+ ${Object.entries(stats).map(([site, siteStats]) => ` +
+
+
+
${site.charAt(0).toUpperCase() + site.slice(1)} Stats
+
+
+
    + ${Object.entries(siteStats).map(([key, value]) => ` +
  • + ${key.replace(/([A-Z])/g, ' $1').replace(/^./, str => str.toUpperCase())} + ${value} +
  • + `).join('')} +
+
+
+
+ `).join('')} +
+
+
+ `; + + // Re-attach the refresh button event listener + document.getElementById('refreshCaptureStats')?.addEventListener('click', updateCaptureStats); } // Using shared syncToArchiveBox function from utils.js diff --git a/entries-tab.js b/entries-tab.js index 9481646..af02568 100755 --- a/entries-tab.js +++ b/entries-tab.js @@ -1,4 +1,38 @@ import { filterEntries, addToArchiveBox, downloadCsv, downloadJson, syncToArchiveBox, updateStatusIndicator, getArchiveBoxServerUrl } from './utils.js'; +import { getAllHandlers, shouldCaptureUrl } from './site-handlers.js'; + +/** + * Get site handler information for an entry + * @param {Object} entry - The entry to get handler info for + * @return {Object|null} Handler info if found + */ +async function getSiteHandlerForEntry(entry) { + if (!entry || !entry.url) return null; + + try { + // Send message to background script + return new Promise(resolve => { + chrome.runtime.sendMessage( + { type: 'getSiteHandlerForUrl', url: entry.url }, + response => resolve(response?.handler || null) + ); + }); + } catch (error) { + console.error('Error getting site handler for entry:', error); + return null; + } +} + +function getSiteHandlerIcon(handlerId) { + const icons = { + reddit: '💬', + twitter: '🐦', + youtube: '▶️', + default: '🌐' + }; + + return icons[handlerId] || icons.default; +} export async function renderEntries(filterText = '', tagFilter = '') { const { entries = [] } = await chrome.storage.local.get('entries'); @@ -16,23 +50,85 @@ export async function renderEntries(filterText = '', tagFilter = '') { // Display filtered entries const entriesList = document.getElementById('entriesList'); - entriesList.innerHTML = filteredEntries.map(entry => ` -
-
- - ${new Date(entry.timestamp).toISOString().replace('T', ' ').split('.')[0]} - -
- ${entry.url} -
-
- ${entry.tags.length ? ` -

- ${entry.tags.map(tag => - `${tag}` - ).join('')} -

- ` : ''} + // Add a custom style for site handler badges if not already present + if (!document.getElementById('siteHandlerStyles')) { + const style = document.createElement('style'); + style.id = 'siteHandlerStyles'; + style.textContent = ` + .site-handler-badge { + display: inline-flex; + align-items: center; + padding: 2px 6px; + font-size: 0.7rem; + background-color: #e3f2fd; + color: #0d6efd; + border-radius: 4px; + margin-right: 8px; + } + + .site-handler-icon { + margin-right: 2px; + } + `; + document.head.appendChild(style); + } + + // Get site handler info for each entry + const entryHandlers = await Promise.all( + filteredEntries.map(async entry => { + return { + entry, + handler: await getSiteHandlerForEntry(entry) + }; + }) + ); + + entriesList.innerHTML = entryHandlers.map(({ entry, handler }) => ` +
+ +
+
+
+ ${handler ? + ` + ${getSiteHandlerIcon(handler.id)} + ${handler.name} + ` : '' + } + ${entry.title || 'Untitled'} +
+ ${(()=>{ + return archivebox_server_url ? + `
+ + 🔗 Original + + + 📦 ArchiveBox + + + 🏛️ Archive.org + +
` + : '' })() + } +
+
+ + ${entry.url} + +
+
+ ${entry.tags.map(tag => + `${tag}` + ).join('')}
@@ -330,9 +426,58 @@ export function initializeEntriesTab() { window.history.pushState({}, '', newUrl); } + /** + * Render the tags list sidebar with frequency counts and site filters + * @param {Array} filteredEntries - The currently filtered entries + */ async function renderTagsList(filteredEntries) { const tagsList = document.getElementById('tagsList'); + // Add site handler filters + const handlers = getAllHandlers(); + + // Check if we have entries from supported sites + const siteCount = {}; + + filteredEntries.forEach(entry => { + Object.entries(handlers).forEach(([id, handler]) => { + if (handler.domains.some(domain => entry.url.includes(domain))) { + siteCount[id] = (siteCount[id] || 0) + 1; + } + }); + }); + + // Start with site filters if we have entries from supported sites + let tagsListHTML = ''; + + if (Object.keys(siteCount).length > 0) { + tagsListHTML += '
Sites
'; + + // Get current filter to highlight active site if any + const currentFilter = document.getElementById('filterInput').value.toLowerCase(); + + // Add site filters sorted by count + tagsListHTML += Object.entries(siteCount) + .sort(([, countA], [, countB]) => countB - countA) + .map(([siteId, count]) => { + const handler = handlers[siteId]; + const isActive = currentFilter === `site:${siteId}`; + + return ` + + + ${getSiteHandlerIcon(siteId)} ${handler.name} + + ${count} + + `; + }).join(''); + + tagsListHTML += '
Tags
'; + } + // Count occurrences of each tag in filtered entries only const tagCounts = filteredEntries.reduce((acc, entry) => { entry.tags.forEach(tag => { @@ -340,19 +485,19 @@ export function initializeEntriesTab() { }); return acc; }, {}); - + // Sort tags by frequency (descending) then alphabetically const sortedTags = Object.entries(tagCounts) .sort(([tagA, countA], [tagB, countB]) => { if (countB !== countA) return countB - countA; return tagA.localeCompare(tagB); }); - + // Get current filter to highlight active tag if any const currentFilter = document.getElementById('filterInput').value.toLowerCase(); - - // Render tags list with counts - tagsList.innerHTML = sortedTags.map(([tag, count]) => ` + + // Add tags with counts + tagsListHTML += sortedTags.map(([tag, count]) => ` @@ -360,7 +505,10 @@ export function initializeEntriesTab() { ${count} `).join(''); - + + // Set the HTML + tagsList.innerHTML = tagsListHTML; + // Add click handlers for tag filtering tagsList.querySelectorAll('.tag-filter').forEach(tagElement => { tagElement.addEventListener('click', (e) => { @@ -378,6 +526,24 @@ export function initializeEntriesTab() { renderEntries(); }); }); + + // Add click handlers for site filtering + tagsList.querySelectorAll('.site-filter').forEach(siteElement => { + siteElement.addEventListener('click', (e) => { + e.preventDefault(); + const site = siteElement.dataset.site; + const filterInput = document.getElementById('filterInput'); + + // Toggle site filter + if (filterInput.value.toLowerCase() === `site:${site}`) { + filterInput.value = ''; // Clear filter if clicking active site + } else { + filterInput.value = `site:${site}`; + } + + renderEntries(); + }); + }); } // Modify existing renderEntries function diff --git a/manifest.json b/manifest.json index 4f32a09..3082cb8 100755 --- a/manifest.json +++ b/manifest.json @@ -8,15 +8,24 @@ "scripting", "activeTab", "contextMenus", - "unlimitedStorage" + "unlimitedStorage", + "webRequest", + "tabs", + "webNavigation" ], "optional_permissions": [ "cookies", "history", "bookmarks" ], - "optional_host_permissions": [ - "*://*\/*" + "host_permissions": [ + "" + ], + "content_scripts": [ + { + "matches": ["*://*.reddit.com/*"], + "js": ["reddit-content.js"] + } ], "icons": { "16": "16.png", @@ -39,8 +48,8 @@ "type": "module" }, "web_accessible_resources": [{ - "resources": ["popup.css", "popup.js"], - "matches": ["*://*\/*"] + "resources": ["popup.css", "popup.js", "site-handlers.js", "reddit-handler.js"], + "matches": ["*://*/*"] }], "commands": { "save-to-archivebox-action": { diff --git a/options.html b/options.html index 73f27e6..03bca06 100755 --- a/options.html +++ b/options.html @@ -299,6 +299,35 @@
Advanced Users Only: Auto-archive URLs

+ +
+
Content Capture Settings
+
+ + +
+
+ When enabled, the extension will automatically detect and save content from supported sites as you browse them. +
+ +
+ + +
+ + +
+ + +
+ + +
+
+
diff --git a/options.js b/options.js index 7c9c4e2..f01e9f3 100755 --- a/options.js +++ b/options.js @@ -3,15 +3,20 @@ import { initializeImport } from './import-tab.js'; import { initializePersonasTab } from './personas-tab.js'; import { initializeCookiesTab } from './cookies-tab.js'; import { initializeConfigTab } from './config-tab.js'; +import { initializeAll as initializeAllSiteHandlers } from './site-handlers.js'; // Initialize all tabs when options page loads -document.addEventListener('DOMContentLoaded', () => { +document.addEventListener('DOMContentLoaded', async () => { + // Initialize all tabs initializeEntriesTab(); initializeImport(); initializePersonasTab(); initializeCookiesTab(); initializeConfigTab(); + // Initialize site handlers + await initializeAllSiteHandlers(); + function changeTab() { if (window.location.hash && window.location.hash !== document.querySelector('a.nav-link.active').id) { console.log('Changing tab based on URL hash:', window.location.hash, `a.nav-link${window.location.hash}`, document.querySelector(`a.nav-link${window.location.hash}`)); diff --git a/popup.js b/popup.js index cb5b74f..245e1ee 100755 --- a/popup.js +++ b/popup.js @@ -3,6 +3,8 @@ const IS_IN_POPUP = window.location.href.startsWith('chrome-extension://') && window.location.href.endsWith('/popup.html'); const IS_ON_WEBSITE = !window.location.href.startsWith('chrome-extension://'); +window.handler_stats = null; // Global stats reference + window.popup_element = null; // Global reference to popup element window.hide_timer = null; @@ -65,6 +67,33 @@ async function sendToArchiveBox(url, tags) { return { ok: ok, status: status}; } +async function getSiteHandlerInfo(url) { + try { + if (!url) return null; + + const response = await chrome.runtime.sendMessage({ + type: 'getSiteHandlerForUrl', + url + }); + + return response?.handler || null; + } catch (error) { + console.log('Failed to get site handler info:', error); + return null; + } +} + +async function getHandlerStats() { + try { + const response = await chrome.runtime.sendMessage({ type: 'getStats' }); + window.handler_stats = response?.stats || {}; + return window.handler_stats; + } catch (error) { + console.log('Failed to get handler stats:', error); + return {}; + } +} + window.getCurrentEntry = async function() { const { entries = [] } = await chrome.storage.local.get('entries'); let current_entry = entries.find(entry => entry.url === window.location.href); @@ -411,6 +440,7 @@ window.createPopup = async function() { 🏛️

+
Saved locally... @@ -603,6 +633,56 @@ window.createPopup = async function() { selectedIndex = -1; } }); + // Check if this URL has a specific handler and show info + const siteHandlerInfo = await getSiteHandlerInfo(current_entry.url); + const statsContainer = popup.querySelector('.site-handler-info'); + + if (siteHandlerInfo) { + // Update the style for the handler info + const style = doc.createElement('style'); + style.textContent += ` + .site-handler-info { + font-size: 12px; + margin-bottom: 8px; + color: #f0f0f0; + } + + .handler-stats { + display: flex; + gap: 8px; + margin-top: 4px; + } + + .stat-item { + background: rgba(255, 255, 255, 0.1); + padding: 2px 6px; + border-radius: 4px; + font-size: 10px; + } + `; + doc.head.appendChild(style); + + // Show handler info + statsContainer.innerHTML = ` +
This page uses the ${siteHandlerInfo.name} handler for enhanced capture.
+
+ ${siteHandlerInfo.id === 'reddit' ? 'Reddit-specific options available in settings' : ''} +
+ `; + + // Get stats if available + const stats = await getHandlerStats(); + const handlerStats = stats[siteHandlerInfo.id]; + + if (handlerStats) { + const statsRow = statsContainer.querySelector('.handler-stats'); + if (handlerStats.captureCount) { + statsRow.innerHTML += `Captured: ${handlerStats.captureCount}`; + } + } + } else { + statsContainer.style.display = 'none'; + } input.focus(); console.log('+ Showed ArchiveBox popup in iframe'); @@ -657,6 +737,22 @@ window.createPopup = async function() { // Initial resize setTimeout(resizeIframe, 0); + + notifyUrlVisit(current_entry.url); } window.createPopup(); + +// Function to notify background script about URL visit +async function notifyUrlVisit(url) { + if (!url) return; + + try { + await chrome.runtime.sendMessage({ + type: 'urlVisited', + url + }); + } catch (error) { + // Ignore any errors + } +} diff --git a/reddit-content.js b/reddit-content.js new file mode 100644 index 0000000..d4283fd --- /dev/null +++ b/reddit-content.js @@ -0,0 +1,365 @@ +// reddit-content.js +// Content script for detecting Reddit posts in the viewport with improved architecture + +// Configuration +const CONFIG = { + OBSERVATION_THRESHOLD: 0.4, // Post must be 40% visible to trigger capture + ROOT_MARGIN: "100px", // Extend detection area beyond viewport + QUEUE_PROCESS_DELAY: 100, // Delay between processing items in queue + MUTATION_OBSERVER_DELAY: 150, // Delay after DOM changes before finding new posts + MAX_PROCESSED_POSTS: 1000, // Maximum number of processed post IDs to store + DEBUG_MODE: true // Enable debug logging +}; + +// State management +const state = { + observedPosts: new Set(), // Posts we've already seen and processed + postQueue: [], // Queue of posts to process in positional order + isProcessingQueue: false, // Whether we're currently processing the queue + captureCount: 0, // Number of posts captured in this session + isEnabled: false, // Whether capture is enabled + isInitialized: false // Whether we've initialized the system +}; + +/** + * Debug logging + */ +function debugLog(...args) { + if (CONFIG.DEBUG_MODE) { + console.log('[ArchiveBox Reddit]', ...args); + } +} + +/** + * Process posts in order from top to bottom of page + */ +function processNextPost() { + if (state.postQueue.length === 0) { + state.isProcessingQueue = false; + return; + } + + state.isProcessingQueue = true; + + // Sort post queue by Y position (top to bottom) + state.postQueue.sort((a, b) => a.position - b.position); + + // Process the topmost post + const postToProcess = state.postQueue.shift(); + capturePost(postToProcess.postElement, postToProcess.postId); + + // Continue processing the queue with a small delay to prevent UI blocking + setTimeout(processNextPost, CONFIG.QUEUE_PROCESS_DELAY); +} + +/** + * Queue a post for capture based on its position in the viewport + */ +function queuePostForCapture(postElement, postId) { + // Get the vertical position of the post + const rect = postElement.getBoundingClientRect(); + const position = rect.top; + + // Add to queue with position data + state.postQueue.push({ + postElement, + postId, + position + }); + + // Start processing queue if not already running + if (!state.isProcessingQueue) { + processNextPost(); + } +} + +/** + * Extract useful information from a post element + */ +function extractPostData(postElement, postId) { + // Extract post details - try different selectors to handle Reddit's different UI versions + const titleElement = postElement.querySelector( + 'h1, h3, [data-testid="post-title"], [data-click-id="body"] h2, a.title' + ); + + const linkElement = postElement.querySelector( + 'a.title, [data-click-id="body"], a[data-click-id="comments"], [data-testid="post-title"] a' + ); + + if (!titleElement) { + debugLog('Could not find title element in post:', postId); + return null; + } + + // Get title + const title = titleElement.textContent.trim(); + + // Get permalink/URL + let url = ''; + if (linkElement && linkElement.href) { + url = linkElement.href; + } else { + // Try to construct URL from post ID if it matches Reddit's post ID format + const redditId = postId.replace('t3_', ''); + if (redditId.length >= 6) { + // Try to extract subreddit + const subredditElement = postElement.querySelector('a[href^="/r/"]'); + const subredditName = subredditElement ? subredditElement.textContent.replace('r/', '') : ''; + + if (subredditName) { + url = `https://www.reddit.com/r/${subredditName}/comments/${redditId}/`; + } else { + url = `https://www.reddit.com/comments/${redditId}/`; + } + } + } + + if (!title || !url) { + debugLog('Insufficient data for post, skipping'); + return null; + } + + // Get subreddit + const subredditElement = postElement.querySelector('a[href^="/r/"]'); + const subreddit = subredditElement ? subredditElement.textContent.replace('r/', '') : ''; + + return { + url, + title, + subreddit + }; +} + +/** + * Capture post data and send to background script + */ +function capturePost(postElement, postId) { + // Only capture the post if we haven't already processed it + if (state.observedPosts.has(postId)) return; + + // Mark as processed and manage the max size of observedPosts + state.observedPosts.add(postId); + if (state.observedPosts.size > CONFIG.MAX_PROCESSED_POSTS) { + // Remove oldest entries (approximation since Sets don't guarantee order) + const excess = state.observedPosts.size - CONFIG.MAX_PROCESSED_POSTS; + const entries = Array.from(state.observedPosts).slice(0, excess); + entries.forEach(entry => state.observedPosts.delete(entry)); + debugLog(`Pruned ${excess} old post IDs from observed set`); + } + + // Extract post data + const postData = extractPostData(postElement, postId); + if (!postData) return; + + // Increment capture count + state.captureCount++; + + // Send to background script with high priority + chrome.runtime.sendMessage({ + type: 'capture', + entry: { + url: postData.url, + title: postData.title, + tags: ['reddit', postData.subreddit, 'viewport-captured'].filter(Boolean), + timestamp: new Date().toISOString(), + priority: 'high' // Mark as high priority + } + }); + + // Add visual indicator to the post + addVisualIndicator(postElement); + + // Show status immediately + chrome.runtime.sendMessage({ + type: 'showStatus', + message: `Captured: ${postData.title.substring(0, 40)}...`, + count: state.captureCount, + immediate: true // Request immediate display + }); + + debugLog('Captured post in viewport:', postData.title, postData.url); +} + +/** + * Add a small visual indicator to show the post has been captured + */ +function addVisualIndicator(postElement) { + // Create indicator if it doesn't exist + if (!postElement.querySelector('.archivebox-captured-indicator')) { + const indicator = document.createElement('div'); + indicator.className = 'archivebox-captured-indicator'; + indicator.style.cssText = ` + position: absolute; + top: 0; + right: 0; + background: rgba(0, 128, 0, 0.6); + color: white; + font-size: 10px; + padding: 2px 5px; + border-radius: 0 0 0 3px; + z-index: 9999; + `; + indicator.textContent = '✓ Archived'; + + // Make sure the post has a relative position for absolute positioning to work + if (getComputedStyle(postElement).position === 'static') { + postElement.style.position = 'relative'; + } + + postElement.appendChild(indicator); + } +} + +/** + * Set up intersection observer to detect posts as they become visible + */ +function setupObserver() { + debugLog('Setting up viewport observer for Reddit'); + + const observer = new IntersectionObserver((entries) => { + entries.forEach(entry => { + if (entry.isIntersecting && entry.intersectionRatio >= CONFIG.OBSERVATION_THRESHOLD) { + const postElement = entry.target; + + // Extract post ID to avoid processing the same post multiple times + const postId = postElement.id || + postElement.getAttribute('data-post-id') || + postElement.getAttribute('data-fullname') || + postElement.getAttribute('id'); + + if (!postId) return; + + // Queue for processing in top-to-bottom order + queuePostForCapture(postElement, postId); + } + }); + }, { + threshold: CONFIG.OBSERVATION_THRESHOLD, + rootMargin: CONFIG.ROOT_MARGIN + }); + + // Find and observe posts + function findAndObservePosts() { + // Attempt to find posts using different selectors for different Reddit versions + const postSelectors = [ + // Current "new" Reddit redesign + 'div[data-testid="post-container"]', + '.Post', + '[data-test-id="post-content"]', + + // Old Reddit design + '.thing[data-author]', + + // Mobile Reddit + 'article[data-testid="post"]', + + // Generic fallbacks that might work across versions + '[data-click-id="body"]', + '.scrollerItem' + ]; + + const postElements = document.querySelectorAll(postSelectors.join(', ')); + + if (postElements.length > 0) { + debugLog(`Found ${postElements.length} Reddit posts to observe`); + postElements.forEach(post => observer.observe(post)); + } + } + + // Initial find + findAndObservePosts(); + + // Set up mutation observer to detect new posts loaded during scrolling + const mutationObserver = new MutationObserver((mutations) => { + let shouldFindPosts = false; + + for (const mutation of mutations) { + if (mutation.addedNodes.length > 0) { + shouldFindPosts = true; + break; + } + } + + if (shouldFindPosts) { + // Wait a small amount of time for any post loading to complete + // This helps reduce redundant processing during rapid DOM changes + clearTimeout(state.mutationTimeout); + state.mutationTimeout = setTimeout(findAndObservePosts, CONFIG.MUTATION_OBSERVER_DELAY); + } + }); + + // Observe changes to the body and any feed containers + const feedContainers = [ + document.body, + ...document.querySelectorAll('.ListingLayout-outerContainer, .browse-container, #siteTable') + ]; + + feedContainers.forEach(container => { + if (container) { + mutationObserver.observe(container, { childList: true, subtree: true }); + } + }); + + return { + disconnect: () => { + observer.disconnect(); + mutationObserver.disconnect(); + debugLog('Observers disconnected'); + } + }; +} + +/** + * Initialize the content script + */ +function initialize() { + if (state.isInitialized) return; + + // Only run on Reddit domains + if (!window.location.hostname.includes('reddit.com')) { + return; + } + + debugLog('Reddit page detected, checking if capture is enabled'); + + // Check if capture is enabled in the extension settings + chrome.runtime.sendMessage({ type: 'getEnableStatus' }, function(response) { + if (response && response.enableScrollCapture) { + debugLog('Reddit capture enabled, setting up viewport detection'); + state.isEnabled = true; + state.observers = setupObserver(); + } else { + debugLog('Reddit capture is disabled in settings'); + state.isEnabled = false; + } + + state.isInitialized = true; + }); + + // Listen for status changes + chrome.runtime.onMessage.addListener((message) => { + if (message.type === 'captureStatusChanged') { + if (message.enabled && !state.isEnabled) { + // Capture was enabled + debugLog('Capture was enabled, setting up observers'); + state.isEnabled = true; + state.observers = setupObserver(); + } else if (!message.enabled && state.isEnabled) { + // Capture was disabled + debugLog('Capture was disabled, shutting down observers'); + state.isEnabled = false; + if (state.observers) { + state.observers.disconnect(); + state.observers = null; + } + } + } + }); +} + +// Handle initialization properly +if (document.readyState === 'loading') { + document.addEventListener('DOMContentLoaded', initialize); +} else { + initialize(); +} diff --git a/reddit-handler.js b/reddit-handler.js new file mode 100644 index 0000000..620b8c5 --- /dev/null +++ b/reddit-handler.js @@ -0,0 +1,593 @@ +// reddit-handler.js +// Manages all Reddit-specific capture functionality + +// Configuration +const CONFIG = { + CAPTURE_DELAY: 1000, // Delay between captures in ms + VIEWPORT_CAPTURE_DELAY: 100, // Quicker for visible posts + MAX_PROCESSED_URLS: 1000, // Maximum number of URLs to keep in memory + DEBUG_MODE: true, + BATCH_SIZE: 10, // Number of entries to batch save + STORAGE_KEY: 'reddit_processed_urls' // Key for storing processed URLs +}; + +// State management +let processedUrls = new Set(); +let captureCount = 0; +let isInitialized = false; + +// Queues with priority +const captureQueue = { + high: [], // Viewport-visible posts + normal: [], // Background discovered posts + processing: false +}; + +/** + * Debug logging + */ +function debugLog(...args) { + if (CONFIG.DEBUG_MODE) { + console.log('[Reddit Handler]', ...args); + } +} + +/** + * Initialize the Reddit handler + */ +export async function initialize() { + if (isInitialized) return; + + debugLog('Initializing Reddit handler'); + + // Load previously processed URLs from storage + const storage = await chrome.storage.local.get(CONFIG.STORAGE_KEY); + if (storage[CONFIG.STORAGE_KEY]) { + try { + const storedUrls = JSON.parse(storage[CONFIG.STORAGE_KEY]); + processedUrls = new Set(storedUrls); + debugLog(`Loaded ${processedUrls.size} previously processed URLs`); + } catch (e) { + debugLog('Error parsing stored URLs:', e); + processedUrls = new Set(); + } + } + + // Reset capture count + captureCount = 0; + + // Setup listeners + setupRedditListeners(); + + isInitialized = true; + debugLog('Reddit handler initialized'); + + // Start queue processor + processQueue(); +} + +/** + * Setup listeners for Reddit-specific functionality + */ +function setupRedditListeners() { + // Listen for navigation to Reddit post pages + chrome.webRequest.onCompleted.addListener( + handleRedditNavigation, + { urls: ["*://*.reddit.com/*"] }, + [] + ); + + // Listen for POST requests that might contain Reddit data + chrome.webRequest.onBeforeRequest.addListener( + handleRedditApiRequest, + { urls: ["*://*.reddit.com/*"] }, + ["requestBody"] + ); +} + +/** + * Handle navigation to a Reddit post + */ +async function handleRedditNavigation(details) { + // Only interested in document navigation + if (details.type !== 'main_frame' && details.type !== 'sub_frame') { + return; + } + + // Check if URL contains Reddit and is a post + if (!details.url.includes('reddit.com') || !isRedditPostUrl(details.url)) { + return; + } + + // Get settings to see if we should capture + const { enableScrollCapture } = await chrome.storage.local.get(['enableScrollCapture']); + if (!enableScrollCapture) { + return; + } + + debugLog('Detected navigation to Reddit post:', details.url); + + // Inject content script for viewport detection + injectContentScript(details.tabId); + + // Wait for page to load title + setTimeout(async () => { + try { + // Get tab info + const tab = await chrome.tabs.get(details.tabId).catch(() => null); + if (!tab) return; + + // Process the URL + processRedditNavigationUrl(details.url, tab.title, details.tabId); + } catch (e) { + debugLog('Error processing Reddit navigation:', e); + } + }, 1000); +} + +/** + * Handle Reddit API requests that might contain post data + */ +async function handleRedditApiRequest(details) { + if (details.method !== "POST") return; + + // Check for relevant endpoints + const isRedditAPIEndpoint = + details.url.includes('/svc/shreddit/events') || + details.url.includes('/svc/shreddit/graphql') || + details.url.includes('/api/'); + + if (!isRedditAPIEndpoint) return; + + // Check if capture is enabled + const { enableScrollCapture } = await chrome.storage.local.get(['enableScrollCapture']); + if (!enableScrollCapture) { + return; + } + + try { + // Try to parse the request body if available + if (details.requestBody && details.requestBody.raw) { + for (const raw of details.requestBody.raw) { + if (raw.bytes) { + const decoder = new TextDecoder(); + const text = decoder.decode(raw.bytes); + + // Look for post data patterns + if (text.includes('"post":') && text.includes('"title":')) { + extractPostsFromJson(text, details.tabId); + } else if (text.includes('"subreddit_name":') && text.includes('"title":')) { + extractPostsFromJson(text, details.tabId); + } + } + } + } + } catch (e) { + debugLog('Error processing request body:', e); + } +} + +/** + * Extract posts from JSON data + */ +function extractPostsFromJson(jsonText, tabId) { + try { + // For debugging, log a sample of what we're trying to parse + debugLog('Parsing JSON data sample:', jsonText.substring(0, 200)); + + // Try to parse the JSON + let data = null; + try { + data = JSON.parse(jsonText); + } catch (e) { + debugLog('Failed to parse JSON:', e.message); + return; + } + + // Check for Reddit's specific structure with "info" array + if (data && data.info && Array.isArray(data.info)) { + debugLog('Found Reddit info array with', data.info.length, 'items'); + + // Process each item in the info array + data.info.forEach(item => { + // Check if this item has a post object + if (item && item.post) { + // Extract the post data + const post = item.post; + + // Check for title field + if (post.title) { + debugLog('Found post with title:', post.title); + + // Create URL + let url = ''; + if (post.url && post.url.startsWith('/')) { + url = 'https://www.reddit.com' + post.url; + } else if (post.url) { + url = post.url; + } else if (post.id && post.id.startsWith('t3_')) { + // Construct URL from post ID + const postId = post.id.substring(3); + + // Include subreddit if available + if (post.subreddit_name) { + const subreddit = post.subreddit_name.replace('r/', ''); + url = `https://www.reddit.com/r/${subreddit}/comments/${postId}`; + } else { + url = `https://www.reddit.com/comments/${postId}`; + } + } + + if (url) { + // Extract subreddit + let subreddit = ''; + if (post.subreddit_name) { + subreddit = post.subreddit_name.replace('r/', ''); + } + + // Create post data object + const postData = { + url: url, + title: post.title, + subreddit: subreddit, + timestamp: new Date().toISOString() + }; + + // Queue the post for processing with normal priority + queueForCapture(postData, tabId, 'normal'); + } + } + } + }); + } + } catch (e) { + debugLog('Error processing JSON data:', e); + } +} + +/** + * Check if URL is a Reddit post + */ +function isRedditPostUrl(url) { + try { + if (!url.includes('reddit.com')) return false; + + const parsedUrl = new URL(url); + return parsedUrl.pathname.includes('/comments/'); + } catch (e) { + return false; + } +} + +/** + * Process a Reddit navigation URL + */ +function processRedditNavigationUrl(url, pageTitle, tabId) { + try { + const parsedUrl = new URL(url); + const pathParts = parsedUrl.pathname.split('/'); + + // Check for /comments/ format + if (pathParts.includes('comments')) { + const commentsIndex = pathParts.indexOf('comments'); + + // Need at least comment ID + if (commentsIndex + 1 < pathParts.length) { + // Get subreddit if present + let subreddit = ''; + if (pathParts[1] === 'r' && pathParts[2]) { + subreddit = pathParts[2]; + } + + // Clean up title + let title = pageTitle || ''; + if (title.includes(' - Reddit')) { + title = title.split(' - Reddit')[0].trim(); + } + + // Create post data + const postData = { + url: url, + title: title || 'Reddit Post', + subreddit: subreddit, + timestamp: new Date().toISOString() + }; + + // Queue for processing with normal priority + queueForCapture(postData, tabId, 'normal'); + } + } + } catch (e) { + debugLog('Error processing Reddit URL:', e); + } +} + +/** + * Queue a post for capture with priority + */ +function queueForCapture(postData, tabId, priority = 'normal') { + if (!postData || !postData.url || !postData.title) { + debugLog('Invalid post data, skipping:', postData); + return; + } + + // Normalize URL to avoid duplicates + const normalizedUrl = normalizeRedditUrl(postData.url); + + // Skip if already processed + if (processedUrls.has(normalizedUrl)) { + debugLog('Skipping already processed URL:', normalizedUrl); + return; + } + + debugLog(`Queueing Reddit post with ${priority} priority:`, postData.title); + + // Add to appropriate queue + captureQueue[priority].push({ + data: postData, + tabId: tabId, + queuedAt: Date.now() + }); + + // Start processing if not already running + if (!captureQueue.processing) { + processQueue(); + } +} + +/** + * Process the capture queue + */ +async function processQueue() { + if (captureQueue.high.length === 0 && captureQueue.normal.length === 0) { + captureQueue.processing = false; + debugLog('Queue empty, stopping processor'); + return; + } + + captureQueue.processing = true; + + // Process high priority queue first + let item; + let delay; + + if (captureQueue.high.length > 0) { + item = captureQueue.high.shift(); + delay = CONFIG.VIEWPORT_CAPTURE_DELAY; + } else { + item = captureQueue.normal.shift(); + delay = CONFIG.CAPTURE_DELAY; + } + + // Get age of item in queue + const queueAge = Date.now() - item.queuedAt; + debugLog(`Processing post from queue (age: ${queueAge}ms):`, item.data.title); + + // Normalize URL for deduplication + const normalizedUrl = normalizeRedditUrl(item.data.url); + + // Mark as processed + addToProcessedUrls(normalizedUrl); + captureCount++; + + // Create entry object + const entry = { + url: item.data.url, + title: item.data.title, + timestamp: item.data.timestamp, + tags: ['reddit', item.data.subreddit].filter(Boolean) + }; + + // Process the entry + await saveEntry(entry); + + // Show status in tab - check if tab still exists first + try { + const tab = await chrome.tabs.get(item.tabId); + if (tab) { + chrome.runtime.sendMessage({ + type: 'showStatus', + message: `${entry.title.substring(0, 40)}...`, + count: captureCount, + tabId: item.tabId + }); + } + } catch (err) { + debugLog(`Tab ${item.tabId} doesn't exist anymore, skipping status update`); + } + + // Schedule next item with delay + setTimeout(processQueue, delay); +} + +/** + * Add URL to processed URLs and manage the size limit + */ +function addToProcessedUrls(url) { + processedUrls.add(url); + + // If we've exceeded the limit, remove oldest items + // This is approximate since Sets don't guarantee order + if (processedUrls.size > CONFIG.MAX_PROCESSED_URLS) { + const urlsArray = Array.from(processedUrls); + const toRemove = urlsArray.slice(0, urlsArray.length - CONFIG.MAX_PROCESSED_URLS); + toRemove.forEach(u => processedUrls.delete(u)); + debugLog(`Removed ${toRemove.length} old URLs from processed set`); + } + + // Periodically save processed URLs to storage + if (processedUrls.size % 50 === 0) { + persistProcessedUrls(); + } +} + +/** + * Save processed URLs to storage + */ +async function persistProcessedUrls() { + const urlsArray = Array.from(processedUrls); + await chrome.storage.local.set({ + [CONFIG.STORAGE_KEY]: JSON.stringify(urlsArray) + }); + debugLog(`Saved ${urlsArray.length} processed URLs to storage`); +} + +/** + * Normalize Reddit URL to avoid duplicates + */ +function normalizeRedditUrl(url) { + try { + const parsedUrl = new URL(url); + + // Extract essential parts (subreddit & post ID) + const parts = parsedUrl.pathname.split('/'); + const commentsIndex = parts.indexOf('comments'); + + if (commentsIndex > 0 && commentsIndex + 1 < parts.length) { + // Get post ID + const postId = parts[commentsIndex + 1]; + + // Get subreddit if available + let subreddit = ''; + if (parts[1] === 'r' && parts[2]) { + subreddit = parts[2]; + } + + // Create canonical URL + if (subreddit) { + return `${parsedUrl.origin}/r/${subreddit}/comments/${postId}`; + } else { + return `${parsedUrl.origin}/comments/${postId}`; + } + } + + // Fallback to removing query params and fragments + return `${parsedUrl.origin}${parsedUrl.pathname}`; + } catch (e) { + debugLog('Error normalizing URL:', e); + return url; + } +} + +/** + * Save entry to local storage + * Eventually used for batch saving + */ +async function saveEntry(entry) { + try { + // Add custom tags if configured + const { scrollCaptureTags } = await chrome.storage.local.get(['scrollCaptureTags']); + const customTags = scrollCaptureTags ? + scrollCaptureTags.split(',').map(tag => tag.trim()) : []; + + // Create the full entry object + const fullEntry = { + id: crypto.randomUUID(), + url: entry.url, + timestamp: entry.timestamp || new Date().toISOString(), + tags: ['auto-captured', 'reddit', ...customTags, ...(entry.tags || [])], + title: entry.title || 'Reddit Post', + notes: `Auto-captured from Reddit: ${entry.url}` + }; + + // Save to storage + const { entries = [] } = await chrome.storage.local.get('entries'); + + // Normalize URLs for more accurate comparison + const normalizeUrl = (url) => { + try { + const normalized = new URL(url); + return normalized.origin + normalized.pathname.replace(/\/$/, ''); + } catch (e) { + return url; + } + }; + + // Check if this URL already exists in our entries + const normalizedEntryUrl = normalizeUrl(entry.url); + const existingEntry = entries.find(e => normalizeUrl(e.url) === normalizedEntryUrl); + + if (!existingEntry) { + entries.push(fullEntry); + await chrome.storage.local.set({ entries }); + debugLog('Entry saved to local storage:', fullEntry.title); + } else { + debugLog('URL already exists in entries, skipping:', entry.url); + } + } catch (e) { + debugLog('Error saving entry:', e); + } +} + +/** + * Inject content script for viewport detection + */ +export async function injectContentScript(tabId) { + try { + const { enableScrollCapture } = await chrome.storage.local.get(['enableScrollCapture']); + if (!enableScrollCapture) { + debugLog('Reddit capture is disabled in settings, not injecting content script'); + return; + } + + debugLog('Injecting Reddit content script into tab:', tabId); + + await chrome.scripting.executeScript({ + target: { tabId: tabId }, + files: ['reddit-content.js'] + }); + + debugLog('Content script injected successfully'); + } catch (err) { + debugLog('Error injecting content script:', err.message); + } +} + +/** + * Handle high priority capture request from content script + */ +export function captureHighPriority(entry, tabId) { + debugLog('Received high priority capture request from content script:', entry.url); + + // Create post data object + const postData = { + url: entry.url, + title: entry.title, + subreddit: entry.tags.find(tag => tag !== 'reddit' && tag !== 'viewport-captured'), + timestamp: entry.timestamp + }; + + // Queue with high priority + queueForCapture(postData, tabId, 'high'); +} + +/** + * Clear all queues and reset + */ +export function reset() { + captureQueue.high = []; + captureQueue.normal = []; + captureQueue.processing = false; + captureCount = 0; + debugLog('Reddit handler reset'); +} + +/** + * Public method to check if we should capture the current URL + */ +export function shouldCaptureUrl(url) { + if (!url.includes('reddit.com')) return false; + return isRedditPostUrl(url); +} + +/** + * Get stats about the Reddit handler + */ +export function getStats() { + return { + captureCount, + processedUrlsCount: processedUrls.size, + highPriorityQueueLength: captureQueue.high.length, + normalPriorityQueueLength: captureQueue.normal.length + }; +} diff --git a/site-handlers.js b/site-handlers.js new file mode 100644 index 0000000..4385ebd --- /dev/null +++ b/site-handlers.js @@ -0,0 +1,254 @@ +// site-handlers.js +// Registry for all site-specific handlers + +import * as RedditHandler from './reddit-handler.js'; + + +// Debug configuration +const DEBUG = true; + +// Debug logging +function debugLog(...args) { + if (DEBUG) { + console.log('[Site Handlers]', ...args); + } +} + +// Registry of all available site handlers +const handlers = { + // Reddit handler + reddit: { + name: 'Reddit', + module: RedditHandler, + domains: ['reddit.com'], + description: 'Automatically captures Reddit posts while browsing', + version: '1.0.0', + author: 'ArchiveBox' + } + + // Add more site handlers here following the same format + // For example: + /* + twitter: { + name: 'Twitter', + module: TwitterHandler, + domains: ['twitter.com', 'x.com'], + description: 'Captures tweets and threads', + version: '1.0.0', + author: 'ArchiveBox' + } + */ +}; + +/** + * Initialize all site handlers + */ +export async function initializeAll() { + debugLog('Initializing all site handlers'); + + // Check if site capture is enabled + const { enableScrollCapture } = await chrome.storage.local.get('enableScrollCapture'); + + if (!enableScrollCapture) { + debugLog('Site capture is disabled, skipping initialization'); + return; + } + + // Initialize each handler + for (const [id, handler] of Object.entries(handlers)) { + if (handler.module && typeof handler.module.initialize === 'function') { + try { + debugLog(`Initializing ${handler.name} handler`); + await handler.module.initialize(); + } catch (error) { + console.error(`Error initializing ${handler.name} handler:`, error); + } + } + } + + debugLog('All site handlers initialized'); +} + +/** + * Get a specific handler by ID + */ +export function getHandler(handlerId) { + return handlers[handlerId]?.module; +} + +/** + * Find a handler for a specific URL + */ +export function findHandlerForUrl(url) { + try { + const hostname = new URL(url).hostname.toLowerCase(); + + for (const [id, handler] of Object.entries(handlers)) { + if (handler.domains.some(domain => hostname.includes(domain))) { + return { id, handler: handler.module }; + } + } + } catch (error) { + console.error('Error finding handler for URL:', error); + } + + return null; +} + +/** + * Handle capture request from content script + */ +export async function handleCaptureRequest(entry, tabId) { + const handlerResult = findHandlerForUrl(entry.url); + + if (handlerResult) { + debugLog(`Using ${handlerResult.id} handler for ${entry.url}`); + + if (entry.priority === 'high' && typeof handlerResult.handler.captureHighPriority === 'function') { + return handlerResult.handler.captureHighPriority(entry, tabId); + } else if (typeof handlerResult.handler.captureNormal === 'function') { + return handlerResult.handler.captureNormal(entry, tabId); + } + } + + // No specific handler found, use generic method + debugLog(`No specific handler for ${entry.url}, using generic method`); + return saveGenericEntry(entry); +} + +/** + * Save a generic entry + */ +async function saveGenericEntry(entry) { + try { + if (!entry || !entry.url) { + return { success: false, reason: 'Invalid entry' }; + } + + // Get current entries + const { entries = [] } = await chrome.storage.local.get('entries'); + + // Check for duplicates + const normalizeUrl = (url) => { + try { + const normalized = new URL(url); + return normalized.origin + normalized.pathname.replace(/\/$/, ''); + } catch (e) { + return url; + } + }; + + const normalizedEntryUrl = normalizeUrl(entry.url); + const existingEntry = entries.find(e => normalizeUrl(e.url) === normalizedEntryUrl); + + if (existingEntry) { + return { success: false, reason: 'URL already exists' }; + } + + // Add custom tags if configured + const { scrollCaptureTags } = await chrome.storage.local.get(['scrollCaptureTags']); + const customTags = scrollCaptureTags ? + scrollCaptureTags.split(',').map(tag => tag.trim()) : []; + + // Extract site tags + const siteTags = getSiteTags(entry.url); + + // Create the full entry object + const fullEntry = { + id: entry.id || crypto.randomUUID(), + url: entry.url, + timestamp: entry.timestamp || new Date().toISOString(), + tags: ['auto-captured', ...siteTags, ...customTags, ...(entry.tags || [])], + title: entry.title || 'Captured content', + notes: entry.notes || `Auto-captured content: ${entry.url}`, + favicon: entry.favicon + }; + + // Add to entries + entries.push(fullEntry); + + // Save entries + await chrome.storage.local.set({ entries }); + + return { success: true }; + } catch (e) { + return { success: false, reason: e.message }; + } +} + +/** + * Extract site name for tagging + */ +function getSiteTags(url) { + try { + const hostname = new URL(url).hostname; + const domain = hostname + .replace('www.', '') + .replace(/\.(com|org|net|io|gov|edu)$/, ''); + return [domain]; + } catch (e) { + return []; + } +} + +/** + * Check a specific URL against all site handlers + */ +export function shouldCaptureUrl(url) { + try { + const handlerResult = findHandlerForUrl(url); + + if (handlerResult && handlerResult.handler.shouldCaptureUrl) { + return handlerResult.handler.shouldCaptureUrl(url); + } + } catch (error) { + console.error('Error checking if URL should be captured:', error); + } + + return false; +} + +/** + * Inject appropriate content script for a URL + */ +export async function injectContentScriptForUrl(url, tabId) { + try { + const handlerResult = findHandlerForUrl(url); + + if (handlerResult && handlerResult.handler.injectContentScript) { + await handlerResult.handler.injectContentScript(tabId); + return true; + } + } catch (error) { + console.error('Error injecting content script:', error); + } + + return false; +} + +/** + * Get stats from all handlers + */ +export function getAllStats() { + const stats = {}; + + for (const [id, handler] of Object.entries(handlers)) { + if (handler.module && typeof handler.module.getStats === 'function') { + stats[id] = handler.module.getStats(); + } + } + + return stats; +} + +/** + * Get all handlers + * Returns the complete registry of site handlers with their metadata + * @returns {Object} Object containing all registered handlers with their metadata + */ +export function getAllHandlers() { + return handlers; +} + +// Export all handlers for direct access +export const Reddit = RedditHandler; diff --git a/utils.js b/utils.js index e7dcc35..960ecdd 100755 --- a/utils.js +++ b/utils.js @@ -10,6 +10,20 @@ export async function getArchiveBoxServerUrl() { export function filterEntries(entries, filterText) { if (!filterText) return entries; + // Handle site: prefix + if (filterText.toLowerCase().startsWith('site:')) { + const siteId = filterText.slice(5).toLowerCase().trim(); + const handlers = getAllHandlers(); + const handler = handlers[siteId]; + + if (handler) { + return entries.filter(entry => + handler.domains.some(domain => entry.url.includes(domain)) + ); + } + } + + // Regular search const searchTerms = filterText.toLowerCase().split(' '); return entries.filter(entry => { const searchableText = [ @@ -207,3 +221,124 @@ export async function syncToArchiveBox(entry) { }; } } + +/** + * Check if a URL should be captured automatically based on regex patterns + * @param {string} url - The URL to check + * @returns {boolean} - Whether the URL should be captured + */ +export async function shouldAutoCapture(url) { + if (!url) return false; + + try { + const { match_urls, exclude_urls } = await chrome.storage.local.get(['match_urls', 'exclude_urls']); + + // If no match pattern is defined, don't capture + if (!match_urls) return false; + + // Create RegExp objects + const matchPattern = new RegExp(match_urls); + const excludePattern = exclude_urls ? new RegExp(exclude_urls) : null; + + // Check if URL matches the inclusion pattern and doesn't match the exclusion pattern + if (matchPattern.test(url)) { + return !excludePattern || !excludePattern.test(url); + } + + return false; + } catch (e) { + console.error('Error checking if URL should be captured:', e); + return false; + } +} + +/** + * Get all available site handlers + * @returns {Promise} - Array of site handler information + */ +export async function getAvailableSiteHandlers() { + try { + return await chrome.runtime.sendMessage({ type: 'getSiteHandlers' }); + } catch (e) { + console.error('Error getting site handlers:', e); + return []; + } +} + +/** + * Get capture statistics + * @returns {Promise} - Capture statistics by site + */ +export async function getCaptureStats() { + try { + const response = await chrome.runtime.sendMessage({ type: 'getStats' }); + return response?.stats || {}; + } catch (e) { + console.error('Error getting capture stats:', e); + return {}; + } +} + +/** + * Limit the size of a collection with a max size + * @param {Array|Set} collection - The collection to limit + * @param {number} maxSize - Maximum size allowed + * @returns {Array|Set} - The limited collection + */ +export function limitCollectionSize(collection, maxSize) { + if (!collection || typeof maxSize !== 'number' || maxSize <= 0) { + return collection; + } + + if (collection instanceof Set) { + if (collection.size <= maxSize) return collection; + + const newSet = new Set(); + const entries = [...collection].slice(-maxSize); // Keep newest items (at the end) + for (const entry of entries) { + newSet.add(entry); + } + return newSet; + } + + if (Array.isArray(collection)) { + if (collection.length <= maxSize) return collection; + return collection.slice(-maxSize); // Keep newest items (at the end) + } + + return collection; +} + +/** + * Get current capture configuration + * @returns {Promise} - Configuration object + */ +export async function getCaptureConfig() { + return await chrome.storage.local.get([ + 'enableScrollCapture', + 'scrollCaptureTags', + 'redditCaptureConfig' + ]); +} + +/** + * Save capture configuration + * @param {Object} config - Configuration to save + * @returns {Promise} + */ +export async function saveCaptureConfig(config) { + await chrome.storage.local.set(config); + + // Notify tabs about configuration changes + const tabs = await chrome.tabs.query({}); + for (const tab of tabs) { + try { + chrome.tabs.sendMessage(tab.id, { + type: 'captureConfigChanged', + config + }).catch(() => {/* Ignore errors for tabs that don't have content scripts */}); + } catch (e) { + // Ignore errors for tabs that don't have content scripts + } + } +}