From 5a8644c87109343aebaf78929bc974f0215ba973 Mon Sep 17 00:00:00 2001 From: Tuxx Date: Wed, 19 Mar 2025 03:02:17 +0100 Subject: [PATCH 1/3] Add automatic content capture on scroll for Reddit Feature proposal. This commit introduces a new feature that automatically captures content as users scroll through websites, starting with Reddit as a proof of concept. The implementation: - Adds a background script that monitors page navigation to Reddit domains - Injects content capture functionality that detects Reddit posts in the viewport - Creates a throttled queue system to prevent excessive API calls - Maintains a status indicator to show users when content is being captured - Adds configuration options in the settings UI to enable/disable the feature - Allows users to specify custom tags for auto-captured content - Implements automatic domain-based tagging - Prevents duplicate entries with URL normalization - Adds visual feedback with a status indicator during capture This feature aims to effortlessly build your personal archive while browsing, capturing valuable content without requiring manual clicks. Reddit serves as the initial implementation, but the architecture can be extended to support other infinite-scrolling sites like Twitter, Facebook, YouTube, etc. Technical details: - Uses mutation observers and scroll event listeners to detect new content - Maintains a processed elements set to avoid duplicate captures - Implements throttling to manage API request frequency - Adds user configuration options in the options page --- background.js | 422 ++++++++++++++++++++++++++++++++++++++++++++++++-- config-tab.js | 23 ++- manifest.json | 5 +- options.html | 34 ++++ 4 files changed, 466 insertions(+), 18 deletions(-) diff --git a/background.js b/background.js index 0c06e9e..fd72997 100755 --- a/background.js +++ b/background.js @@ -2,15 +2,106 @@ import { addToArchiveBox } from "./utils.js"; -chrome.runtime.onMessage.addListener(async (message) => { - const options_url = chrome.runtime.getURL('options.html') + `?search=${message.id}`; - console.log('i ArchiveBox Collector showing options.html', options_url); - if (message.action === 'openOptionsPage') { - await chrome.tabs.create({ url: options_url }); +// Queue for managing entry saving +const entrySaveQueue = []; +let processingQueue = false; + +// Process the save queue +function processEntrySaveQueue() { + if (entrySaveQueue.length === 0) { + processingQueue = false; + return; + } + + processingQueue = true; + const entry = entrySaveQueue.shift(); + + // Process entry + chrome.storage.local.get(['entries', 'enableScrollCapture'], (result) => { + // Only save entries if automatic capture is enabled + if (!result.enableScrollCapture) { + setTimeout(processEntrySaveQueue, 200); + return; } + + const entries = result.entries || []; + + // Normalize URLs for more accurate comparison + const normalizeUrl = (url) => { + try { + const normalized = new URL(url); + // Remove trailing slashes, query parameters, and fragment + return normalized.origin + normalized.pathname.replace(/\/$/, ''); + } catch (e) { + return url; + } + }; + + const normalizedEntryUrl = normalizeUrl(entry.url); + + // Check if this URL already exists in our entries (use normalized URLs) + const existingEntry = entries.find(e => normalizeUrl(e.url) === normalizedEntryUrl); + if (existingEntry) { + setTimeout(processEntrySaveQueue, 200); + return; + } + + // Add custom tags if configured + chrome.storage.local.get(['scrollCaptureTags'], (tagResult) => { + const customTags = tagResult.scrollCaptureTags ? + tagResult.scrollCaptureTags.split(',').map(tag => tag.trim()) : []; + + // Extract site tags + const siteTags = getSiteTags(entry.url); + + // Create the full entry object + const fullEntry = { + id: crypto.randomUUID(), + url: entry.url, + timestamp: entry.timestamp || new Date().toISOString(), + tags: ['auto-captured', ...siteTags, ...customTags, ...(entry.tags || [])], + title: entry.title || 'Captured content', + notes: `Auto-captured content: ${entry.url}` + }; + + entries.push(fullEntry); + + chrome.storage.local.set({ entries }, () => { + // Process next item after a delay - increased for better throttling + setTimeout(processEntrySaveQueue, 500); + }); + }); }); +} + +chrome.runtime.onMessage.addListener((message, sender, sendResponse) => { + // Handle opening options page + if (message.action === 'openOptionsPage') { + const options_url = chrome.runtime.getURL('options.html') + `?search=${message.id}`; + chrome.tabs.create({ url: options_url }); + } + + // Handle archivebox_add + if (message.type === 'archivebox_add') { + addToArchiveBox(message.body, sendResponse, sendResponse); + return true; // Keep the message channel open for the async response + } + + // Handle content capture + if (message.type === 'capture') { + saveEntry(message.entry); + sendResponse({ success: true }); + } + + return true; // Indicate async response +}); chrome.action.onClicked.addListener(async (tab) => { + // Don't try to execute script on chrome:// URLs + if (tab.url.startsWith('chrome://')) { + return; + } + const entry = { id: crypto.randomUUID(), url: tab.url, @@ -25,25 +116,24 @@ chrome.action.onClicked.addListener(async (tab) => { entries.push(entry); await chrome.storage.local.set({ entries }); - // Inject scripts - CSS now handled in popup.js + // Inject scripts await chrome.scripting.executeScript({ target: { tabId: tab.id }, files: ['popup.js'] + }).catch(err => { + console.error('Error injecting script:', err); }); }); -chrome.runtime.onMessage.addListener((message, sender, sendResponse) => { - if (message.type === 'archivebox_add') { - addToArchiveBox(message.body, sendResponse, sendResponse); - } - return true; -}); - - chrome.contextMenus.onClicked.addListener(onClickContextMenuSave); // A generic onclick callback function. async function onClickContextMenuSave(item, tab) { + // Don't try to execute script on chrome:// URLs + if (tab.url.startsWith('chrome://')) { + return; + } + const entry = { id: crypto.randomUUID(), url: tab.url, @@ -58,15 +148,317 @@ async function onClickContextMenuSave(item, tab) { entries.push(entry); await chrome.storage.local.set({ entries }); - // Inject scripts - CSS now handled in popup.js + // Inject scripts await chrome.scripting.executeScript({ target: { tabId: tab.id }, files: ['popup.js'] + }).catch(err => { + console.error('Error injecting script:', err); }); } + chrome.runtime.onInstalled.addListener(function () { chrome.contextMenus.create({ id: 'save_to_archivebox_ctxmenu', title: 'Save to ArchiveBox', }); }); + +// Replace the saveEntry function with this throttled version +function saveEntry(entry) { + // Don't save if no URL + if (!entry || !entry.url) return; + + // Add to queue + entrySaveQueue.push(entry); + + // Start processing if not already running + if (!processingQueue) { + processEntrySaveQueue(); + } +} + +// Extract site name for tagging +function getSiteTags(url) { + try { + const hostname = new URL(url).hostname; + const domain = hostname + .replace('www.', '') + .replace('.com', '') + .replace('.org', '') + .replace('.net', ''); + return [domain]; + } catch (e) { + return []; + } +} + +// Setup content capture for Reddit +function setupContentCapture() { + // Setup page load detection + chrome.tabs.onUpdated.addListener((tabId, changeInfo, tab) => { + // Only run once the page is fully loaded + if (changeInfo.status !== 'complete') return; + + // Only run on Reddit + if (!tab.url.includes('reddit.com')) return; + + // Execute the content script immediately after page load + chrome.scripting.executeScript({ + target: {tabId: tabId}, + function: setupPageCapture + }).catch(err => { + console.error('Error setting up page capture:', err); + }); + }); +} + +// Call this function when the extension starts +chrome.runtime.onStartup.addListener(setupContentCapture); +chrome.runtime.onInstalled.addListener(setupContentCapture); + +// This function sets up the content capture on Reddit pages +function setupPageCapture() { + console.log('[ArchiveBox] Setting up page capture'); + + // Skip if already set up + if (window.archiveBoxSetupComplete) return; + window.archiveBoxSetupComplete = true; + + // Create tracking set if it doesn't exist + if (!window.archiveBoxProcessedElements) { + window.archiveBoxProcessedElements = new Set(); + } + + // Create a queue for captured entries to throttle submissions + if (!window.archiveBoxCaptureQueue) { + window.archiveBoxCaptureQueue = []; + } + + // Setup throttled submission process + if (!window.archiveBoxProcessingQueue) { + window.archiveBoxProcessingQueue = false; + + function processQueue() { + if (window.archiveBoxCaptureQueue.length === 0) { + window.archiveBoxProcessingQueue = false; + return; + } + + window.archiveBoxProcessingQueue = true; + const entry = window.archiveBoxCaptureQueue.shift(); + + chrome.runtime.sendMessage({ + type: 'capture', + entry: entry + }, () => { + // Add timeout for throttling + setTimeout(processQueue, 500); + }); + } + + // Function to add to queue and start processing if needed + window.queueCaptureEntry = (entry) => { + // Avoid duplicate entries in the queue by URL + if (!window.archiveBoxCaptureQueue.some(item => item.url === entry.url)) { + window.archiveBoxCaptureQueue.push(entry); + + // Start queue processing if not already running + if (!window.archiveBoxProcessingQueue) { + processQueue(); + } + } + }; + } + + // Create status indicator if it doesn't exist + if (!document.getElementById('archiveBoxStatusIndicator')) { + const indicator = document.createElement('div'); + indicator.id = 'archiveBoxStatusIndicator'; + indicator.style.cssText = ` + position: fixed; + bottom: 10px; + right: 10px; + background: rgba(0, 0, 0, 0.7); + color: white; + padding: 5px 10px; + border-radius: 5px; + font-size: 12px; + z-index: 9999; + transition: opacity 0.5s; + opacity: 0; + `; + document.body.appendChild(indicator); + } + + // Function to show status + window.showArchiveBoxStatus = (message) => { + const indicator = document.getElementById('archiveBoxStatusIndicator'); + if (indicator) { + indicator.textContent = `ArchiveBox: ${message}`; + indicator.style.opacity = '1'; + setTimeout(() => { + indicator.style.opacity = '0'; + }, 2000); + } + }; + + // Improved scroll event listener with throttling + let scrollTimeout = null; + window.addEventListener('scroll', () => { + // Cancel any pending scan + if (scrollTimeout) clearTimeout(scrollTimeout); + + // Schedule a new scan after user stops scrolling for 300ms + scrollTimeout = setTimeout(() => { + scanVisiblePosts(); + }, 300); + }); + + // Add mutation observer to detect new Reddit posts dynamically added to the page + const observeNewContent = () => { + const targetNode = document.body; + + // Observer configuration + const config = { + childList: true, + subtree: true, + attributes: false + }; + + // Callback to be executed when mutations are observed + const callback = function(mutationsList, observer) { + let hasNewPosts = false; + + for (const mutation of mutationsList) { + if (mutation.type === 'childList' && mutation.addedNodes.length) { + // Check if any added nodes contain potential Reddit posts + for (const node of mutation.addedNodes) { + if (node.nodeType === Node.ELEMENT_NODE) { + // Check for new content: either the node is a post or contains posts + if ( + (node.tagName === 'SHREDDIT-POST') || + (node.querySelector && ( + node.querySelector('shreddit-post') || + node.querySelector('.thing.link') + )) + ) { + hasNewPosts = true; + break; + } + } + } + } + + if (hasNewPosts) break; + } + + // Only scan if we detected new posts being added + if (hasNewPosts) { + // Use a small delay to ensure the DOM is fully updated + setTimeout(() => { + scanVisiblePosts(); + }, 100); + } + }; + + // Create an observer instance linked to the callback function + const observer = new MutationObserver(callback); + + // Start observing the target node for configured mutations + observer.observe(targetNode, config); + }; + + // Start the mutation observer + observeNewContent(); + + // Do initial scan + console.log('[ArchiveBox] Performing initial scan'); + // Small delay for initial scan to ensure the page is fully loaded + setTimeout(() => { + scanVisiblePosts(); + }, 300); + + function scanVisiblePosts() { + // Process shreddit-post elements (new Reddit) + scanElements('shreddit-post', (post) => { + const permalink = post.getAttribute('permalink'); + const postTitle = post.getAttribute('post-title'); + const subredditName = post.getAttribute('subreddit-prefixed-name'); + + if (permalink) { + const fullUrl = permalink.startsWith('http') ? + permalink : `https://www.reddit.com${permalink}`; + + // Extract subreddit from prefixed name (r/subreddit) + let subreddit = ''; + if (subredditName && subredditName.startsWith('r/')) { + subreddit = subredditName.substring(2); + } + + return { + url: fullUrl, + title: postTitle || document.title, + tags: ['reddit', subreddit] + }; + } + return null; + }); + + // Process .thing.link elements (old Reddit) + scanElements('.thing.link', (post) => { + const permalink = post.getAttribute('data-permalink'); + if (permalink) { + const fullUrl = `https://www.reddit.com${permalink}`; + const title = post.querySelector('.title a')?.textContent || ''; + const subreddit = post.getAttribute('data-subreddit') || ''; + + return { + url: fullUrl, + title: title, + tags: ['reddit', subreddit] + }; + } + return null; + }); + } + + function scanElements(selector, extractFn) { + const elements = document.querySelectorAll(selector); + if (elements.length > 0) { + console.log(`[ArchiveBox] Found ${elements.length} elements matching '${selector}'`); + } + + Array.from(elements).forEach(element => { + // Skip already processed elements + if (window.archiveBoxProcessedElements.has(element)) return; + + // Check if the element is visible in the viewport + const rect = element.getBoundingClientRect(); + const isVisible = ( + rect.top >= 0 && + rect.left >= 0 && + rect.bottom <= (window.innerHeight || document.documentElement.clientHeight) && + rect.right <= (window.innerWidth || document.documentElement.clientWidth) + ); + + // Only process visible elements + if (!isVisible) return; + + // Extract entry data + const entry = extractFn(element); + if (!entry) return; + + // Mark as processed + window.archiveBoxProcessedElements.add(element); + + // Add to throttled queue instead of sending immediately + window.queueCaptureEntry(entry); + + // Show status + window.showArchiveBoxStatus(`Captured: ${entry.title.substring(0, 30)}...`); + + console.log(`[ArchiveBox] Queued for capture: ${entry.url}`); + }); + } +} diff --git a/config-tab.js b/config-tab.js index 42a897c..fdeec6f 100755 --- a/config-tab.js +++ b/config-tab.js @@ -18,7 +18,7 @@ export async function initializeConfigTab() { console.log('Got config values from storage:', archivebox_server_url, archivebox_api_key, match_urls, exclude_urls); // migrate old config_archiveboxBaseUrl to archivebox_server_url - const {config_archiveBoxBaseUrl} = await chrome.storage.sync.get('config_archiveboxBaseUrl', ); + const {config_archiveBoxBaseUrl} = await chrome.storage.sync.get('config_archiveBoxBaseUrl', ); if (config_archiveBoxBaseUrl) { await chrome.storage.local.set({ archivebox_server_url: config_archiveBoxBaseUrl }); } @@ -209,7 +209,7 @@ export async function initializeConfigTab() { }; const result = await syncToArchiveBox(testEntry); - document.getElementById('inprogress-test').remove(); + document.getElementById('inprogress-test')?.remove(); if (result.ok) { testStatus.innerHTML += ` @@ -241,6 +241,25 @@ export async function initializeConfigTab() { testButton.click(); } }); + + //Load scroll capture settings + const enableScrollCapture = document.getElementById('enableScrollCapture'); + const scrollCaptureTags = document.getElementById('scrollCaptureTags'); + + const { enableScrollCapture: savedEnableScrollCapture, scrollCaptureTags: savedScrollCaptureTags } = + await chrome.storage.local.get(['enableScrollCapture', 'scrollCaptureTags']); + + enableScrollCapture.checked = !!savedEnableScrollCapture; + scrollCaptureTags.value = savedScrollCaptureTags || ''; + + // Add event handlers for scroll capture settings + enableScrollCapture.addEventListener('change', async () => { + await chrome.storage.local.set({ enableScrollCapture: enableScrollCapture.checked }); + }); + + scrollCaptureTags.addEventListener('change', async () => { + await chrome.storage.local.set({ scrollCaptureTags: scrollCaptureTags.value }); + }); } // Using shared syncToArchiveBox function from utils.js diff --git a/manifest.json b/manifest.json index 4f32a09..4d5364d 100755 --- a/manifest.json +++ b/manifest.json @@ -8,7 +8,10 @@ "scripting", "activeTab", "contextMenus", - "unlimitedStorage" + "unlimitedStorage", + "webRequest", + "tabs", + "webNavigation" ], "optional_permissions": [ "cookies", diff --git a/options.html b/options.html index 73f27e6..d753c64 100755 --- a/options.html +++ b/options.html @@ -299,6 +299,40 @@
Advanced Users Only: Auto-archive URLs

+ +
+
Reddit Content Capture
+
+ + +
+
+ When enabled, the extension will automatically detect and save Reddit posts as you browse them. Posts are captured as they become visible in your viewport. +
+ +
+ + +
+ +
+
Reddit Content Capture Features:
+
    +
  • ✓ Automatic capture of posts visible in your viewport
  • +
  • ✓ Detection of new content as you scroll
  • +
  • ✓ Capturing metadata including subreddit name
  • +
  • ✓ Throttled content capture to prevent performance issues
  • +
  • ✓ Visual indicator when content is captured
  • +
+ + The extension uses a mutation observer to detect new posts as they load during infinite scrolling. + +
+
+ From 6a745b3e57744d58bb1b0fcc31c7a035067e4464 Mon Sep 17 00:00:00 2001 From: Tuxx Date: Thu, 20 Mar 2025 01:52:02 +0100 Subject: [PATCH 2/3] fix: resolve Reddit content capture issues This commit fixes several issues with the Reddit content capture functionality: 1. Storage Access Error: Removed direct chrome.storage.session calls from the injected script context, using window variables instead to avoid "Access to storage is not allowed from this context" errors. 2. Browser Restart Detection: Added setupExistingTabs() function to detect and initialize Reddit tabs that were already open when the extension starts or reloads. 3. Small Window Detection: Improved post visibility detection to use a more forgiving algorithm that captures posts partially visible in the viewport, fixing issues with small browser windows. 4. Added Message Passing: Implemented chrome.runtime.sendMessage for configuration values instead of direct storage access. 5. Improved Status Indicator: Enhanced the status display to show multiple captured posts at once with a counter. 6. Toggleable Debug Logging: Added a DEBUG_MODE constant to easily enable/disable diagnostic logs. These changes improve reliability of the Reddit capture system across browser restarts and different window sizes while maintaining the original functionality. --- background.js | 504 +++++++++++++++++++++++++++++++++++++++----------- manifest.json | 4 +- 2 files changed, 394 insertions(+), 114 deletions(-) diff --git a/background.js b/background.js index fd72997..49193bc 100755 --- a/background.js +++ b/background.js @@ -2,6 +2,15 @@ import { addToArchiveBox } from "./utils.js"; +// Debug configuration +const DEBUG_MODE = false; // Easy toggle for debugging + +function debugLog(...args) { + if (DEBUG_MODE) { + console.log('[ArchiveBox Debug]', ...args); + } +} + // Queue for managing entry saving const entrySaveQueue = []; let processingQueue = false; @@ -10,21 +19,25 @@ let processingQueue = false; function processEntrySaveQueue() { if (entrySaveQueue.length === 0) { processingQueue = false; + debugLog('Queue empty, stopping processor'); return; } processingQueue = true; const entry = entrySaveQueue.shift(); + debugLog('Processing entry from queue:', entry.url); // Process entry chrome.storage.local.get(['entries', 'enableScrollCapture'], (result) => { // Only save entries if automatic capture is enabled if (!result.enableScrollCapture) { + debugLog('Automatic content capture disabled, not saving entry'); setTimeout(processEntrySaveQueue, 200); return; } const entries = result.entries || []; + debugLog('Current entries count:', entries.length); // Normalize URLs for more accurate comparison const normalizeUrl = (url) => { @@ -33,26 +46,37 @@ function processEntrySaveQueue() { // Remove trailing slashes, query parameters, and fragment return normalized.origin + normalized.pathname.replace(/\/$/, ''); } catch (e) { + debugLog('URL normalization error:', e); return url; } }; const normalizedEntryUrl = normalizeUrl(entry.url); + debugLog('Normalized URL:', normalizedEntryUrl); // Check if this URL already exists in our entries (use normalized URLs) const existingEntry = entries.find(e => normalizeUrl(e.url) === normalizedEntryUrl); if (existingEntry) { + debugLog('URL already exists in entries, skipping:', entry.url); setTimeout(processEntrySaveQueue, 200); return; } // Add custom tags if configured - chrome.storage.local.get(['scrollCaptureTags'], (tagResult) => { + chrome.storage.local.get(['scrollCaptureTags', 'archivebox_server_url', 'archivebox_api_key'], (tagResult) => { + debugLog('Server configuration:', { + serverUrl: tagResult.archivebox_server_url || 'Not configured', + apiKeySet: tagResult.archivebox_api_key ? 'Yes' : 'No' + }); + const customTags = tagResult.scrollCaptureTags ? tagResult.scrollCaptureTags.split(',').map(tag => tag.trim()) : []; + debugLog('Custom tags:', customTags); + // Extract site tags const siteTags = getSiteTags(entry.url); + debugLog('Site tags:', siteTags); // Create the full entry object const fullEntry = { @@ -64,9 +88,11 @@ function processEntrySaveQueue() { notes: `Auto-captured content: ${entry.url}` }; + debugLog('Saving new entry:', fullEntry); entries.push(fullEntry); chrome.storage.local.set({ entries }, () => { + debugLog('Entry saved to local storage'); // Process next item after a delay - increased for better throttling setTimeout(processEntrySaveQueue, 500); }); @@ -75,30 +101,46 @@ function processEntrySaveQueue() { } chrome.runtime.onMessage.addListener((message, sender, sendResponse) => { + debugLog('Message received:', message.type || message.action); + // Handle opening options page if (message.action === 'openOptionsPage') { const options_url = chrome.runtime.getURL('options.html') + `?search=${message.id}`; + debugLog('Opening options page:', options_url); chrome.tabs.create({ url: options_url }); } // Handle archivebox_add if (message.type === 'archivebox_add') { + debugLog('ArchiveBox add request'); addToArchiveBox(message.body, sendResponse, sendResponse); return true; // Keep the message channel open for the async response } // Handle content capture if (message.type === 'capture') { + debugLog('Capture request received:', message.entry.url); saveEntry(message.entry); sendResponse({ success: true }); } + + // Add the new handler for getEnableStatus + if (message.type === 'getEnableStatus') { + chrome.storage.local.get(['enableScrollCapture'], (result) => { + sendResponse({ enableScrollCapture: !!result.enableScrollCapture }); + }); + return true; // Keep the message channel open for async response + } return true; // Indicate async response }); chrome.action.onClicked.addListener(async (tab) => { + debugLog('Extension icon clicked on tab:', tab.url); + // Don't try to execute script on chrome:// URLs if (tab.url.startsWith('chrome://')) { + debugLog('Cannot execute on chrome:// URL, skipping'); return; } @@ -111,12 +153,16 @@ chrome.action.onClicked.addListener(async (tab) => { favicon: tab.favIconUrl }; + debugLog('Created entry from tab click:', entry); + // Save the entry first const { entries = [] } = await chrome.storage.local.get('entries'); entries.push(entry); await chrome.storage.local.set({ entries }); + debugLog('Entry saved to local storage'); // Inject scripts + debugLog('Injecting popup script into tab'); await chrome.scripting.executeScript({ target: { tabId: tab.id }, files: ['popup.js'] @@ -129,8 +175,11 @@ chrome.contextMenus.onClicked.addListener(onClickContextMenuSave); // A generic onclick callback function. async function onClickContextMenuSave(item, tab) { + debugLog('Context menu save clicked for tab:', tab.url); + // Don't try to execute script on chrome:// URLs if (tab.url.startsWith('chrome://')) { + debugLog('Cannot execute on chrome:// URL, skipping'); return; } @@ -143,12 +192,16 @@ async function onClickContextMenuSave(item, tab) { favicon: tab.favIconUrl }; + debugLog('Created entry from context menu:', entry); + // Save the entry first const { entries = [] } = await chrome.storage.local.get('entries'); entries.push(entry); await chrome.storage.local.set({ entries }); + debugLog('Entry saved to local storage'); // Inject scripts + debugLog('Injecting popup script into tab'); await chrome.scripting.executeScript({ target: { tabId: tab.id }, files: ['popup.js'] @@ -158,6 +211,7 @@ async function onClickContextMenuSave(item, tab) { } chrome.runtime.onInstalled.addListener(function () { + debugLog('Extension installed or updated'); chrome.contextMenus.create({ id: 'save_to_archivebox_ctxmenu', title: 'Save to ArchiveBox', @@ -167,13 +221,19 @@ chrome.runtime.onInstalled.addListener(function () { // Replace the saveEntry function with this throttled version function saveEntry(entry) { // Don't save if no URL - if (!entry || !entry.url) return; + if (!entry || !entry.url) { + debugLog('Invalid entry, not saving', entry); + return; + } + + debugLog('Queueing entry for saving:', entry.url); // Add to queue entrySaveQueue.push(entry); // Start processing if not already running if (!processingQueue) { + debugLog('Starting queue processor'); processEntrySaveQueue(); } } @@ -189,12 +249,14 @@ function getSiteTags(url) { .replace('.net', ''); return [domain]; } catch (e) { + debugLog('Error extracting site tags:', e); return []; } } // Setup content capture for Reddit function setupContentCapture() { + debugLog('Setting up content capture listeners'); // Setup page load detection chrome.tabs.onUpdated.addListener((tabId, changeInfo, tab) => { // Only run once the page is fully loaded @@ -203,76 +265,119 @@ function setupContentCapture() { // Only run on Reddit if (!tab.url.includes('reddit.com')) return; + debugLog('Reddit page loaded, initializing capture:', tab.url); + // Execute the content script immediately after page load chrome.scripting.executeScript({ target: {tabId: tabId}, function: setupPageCapture }).catch(err => { console.error('Error setting up page capture:', err); + debugLog('Error details:', { + message: err.message, + tabUrl: tab.url, + tabId: tabId + }); }); }); } // Call this function when the extension starts -chrome.runtime.onStartup.addListener(setupContentCapture); -chrome.runtime.onInstalled.addListener(setupContentCapture); +chrome.runtime.onStartup.addListener(() => { + debugLog('Extension started'); + setupContentCapture(); + + // Check for existing Reddit tabs + chrome.tabs.query({url: "*://*.reddit.com/*"}, (tabs) => { + debugLog(`Found ${tabs.length} existing Reddit tabs`); + + tabs.forEach(tab => { + debugLog(`Setting up Reddit capture on existing tab: ${tab.id} - ${tab.url}`); + chrome.scripting.executeScript({ + target: {tabId: tab.id}, + function: setupPageCapture + }).catch(err => { + console.error('Error setting up page capture on existing tab:', err); + }); + }); + }); +}); + +chrome.runtime.onInstalled.addListener(() => { + debugLog('Extension installed'); + setupContentCapture(); +}); // This function sets up the content capture on Reddit pages function setupPageCapture() { - console.log('[ArchiveBox] Setting up page capture'); - - // Skip if already set up - if (window.archiveBoxSetupComplete) return; - window.archiveBoxSetupComplete = true; - - // Create tracking set if it doesn't exist - if (!window.archiveBoxProcessedElements) { - window.archiveBoxProcessedElements = new Set(); + // Local logging function + function localLog(message, data) { + console.log('[ArchiveBox]', message, data || ''); } + + localLog('Setting up page capture', { + url: window.location.href, + windowSize: `${window.innerWidth}x${window.innerHeight}` + }); - // Create a queue for captured entries to throttle submissions - if (!window.archiveBoxCaptureQueue) { - window.archiveBoxCaptureQueue = []; + // Use window variables instead of chrome.storage for state tracking + if (window.archiveBoxSetupComplete) { + localLog('Setup already completed for this tab'); + scanVisiblePosts(); + return; } + // Mark as setup complete using window variable + window.archiveBoxSetupComplete = true; + window.archiveBoxProcessedElements = new Set(); + window.archiveBoxCaptureQueue = []; + window.archiveBoxStatusQueue = []; + + localLog('Performing initial setup'); + // Setup throttled submission process - if (!window.archiveBoxProcessingQueue) { - window.archiveBoxProcessingQueue = false; - - function processQueue() { - if (window.archiveBoxCaptureQueue.length === 0) { - window.archiveBoxProcessingQueue = false; - return; - } - - window.archiveBoxProcessingQueue = true; - const entry = window.archiveBoxCaptureQueue.shift(); - - chrome.runtime.sendMessage({ - type: 'capture', - entry: entry - }, () => { - // Add timeout for throttling - setTimeout(processQueue, 500); - }); + window.archiveBoxProcessingQueue = false; + + function processQueue() { + if (window.archiveBoxCaptureQueue.length === 0) { + window.archiveBoxProcessingQueue = false; + localLog('Capture queue empty, stopping processor'); + return; } - // Function to add to queue and start processing if needed - window.queueCaptureEntry = (entry) => { - // Avoid duplicate entries in the queue by URL - if (!window.archiveBoxCaptureQueue.some(item => item.url === entry.url)) { - window.archiveBoxCaptureQueue.push(entry); - - // Start queue processing if not already running - if (!window.archiveBoxProcessingQueue) { - processQueue(); - } - } - }; + window.archiveBoxProcessingQueue = true; + const entry = window.archiveBoxCaptureQueue.shift(); + localLog('Processing from capture queue:', entry.url); + + chrome.runtime.sendMessage({ + type: 'capture', + entry: entry + }, () => { + // Add timeout for throttling + setTimeout(processQueue, 500); + }); } - // Create status indicator if it doesn't exist + // Function to add to queue and start processing if needed + window.queueCaptureEntry = (entry) => { + // Avoid duplicate entries in the queue by URL + if (!window.archiveBoxCaptureQueue.some(item => item.url === entry.url)) { + localLog('Adding to capture queue:', entry.url); + window.archiveBoxCaptureQueue.push(entry); + + // Start queue processing if not already running + if (!window.archiveBoxProcessingQueue) { + localLog('Starting capture queue processor'); + processQueue(); + } + } else { + localLog('URL already in queue, skipping:', entry.url); + } + }; + + // Create enhanced status indicator if it doesn't exist if (!document.getElementById('archiveBoxStatusIndicator')) { + localLog('Creating status indicator'); const indicator = document.createElement('div'); indicator.id = 'archiveBoxStatusIndicator'; indicator.style.cssText = ` @@ -281,26 +386,95 @@ function setupPageCapture() { right: 10px; background: rgba(0, 0, 0, 0.7); color: white; - padding: 5px 10px; + padding: 10px; border-radius: 5px; font-size: 12px; z-index: 9999; transition: opacity 0.5s; opacity: 0; + max-width: 300px; + max-height: 200px; + overflow-y: auto; + line-height: 1.3; `; document.body.appendChild(indicator); + + // Create a container for the message list + const messageContainer = document.createElement('div'); + messageContainer.id = 'archiveBoxStatusMessages'; + indicator.appendChild(messageContainer); + + // Create a count indicator + const countIndicator = document.createElement('div'); + countIndicator.id = 'archiveBoxStatusCount'; + countIndicator.style.cssText = ` + margin-top: 5px; + font-weight: bold; + text-align: center; + border-top: 1px solid rgba(255, 255, 255, 0.3); + padding-top: 5px; + `; + indicator.appendChild(countIndicator); } - // Function to show status + // Improved function to show multiple status messages window.showArchiveBoxStatus = (message) => { const indicator = document.getElementById('archiveBoxStatusIndicator'); - if (indicator) { - indicator.textContent = `ArchiveBox: ${message}`; - indicator.style.opacity = '1'; + const messageContainer = document.getElementById('archiveBoxStatusMessages'); + const countIndicator = document.getElementById('archiveBoxStatusCount'); + + if (!indicator || !messageContainer || !countIndicator) { + localLog('Status indicator elements not found'); + return; + } + + // Add this message to the queue + if (!window.archiveBoxStatusQueue) window.archiveBoxStatusQueue = []; + window.archiveBoxStatusQueue.push(message); + localLog('Added to status queue:', message); + + // Limit queue to last 5 items + if (window.archiveBoxStatusQueue.length > 5) { + window.archiveBoxStatusQueue.shift(); + } + + // Update the messages display + messageContainer.innerHTML = window.archiveBoxStatusQueue.map(msg => + `
• ${msg}
` + ).join(''); + + // Update count + countIndicator.textContent = `Captured ${window.archiveBoxStatusQueue.length} posts`; + + // Show the indicator + indicator.style.opacity = '1'; + + // Hide after a longer delay to account for multiple captures + clearTimeout(window.archiveBoxStatusTimeout); + window.archiveBoxStatusTimeout = setTimeout(() => { + indicator.style.opacity = '0'; + // Clear the queue after hiding setTimeout(() => { - indicator.style.opacity = '0'; - }, 2000); + window.archiveBoxStatusQueue = []; + }, 500); + }, 3000); + }; + + // Store processed elements in window variables + window.markElementAsProcessed = (elementId) => { + if (!window.archiveBoxProcessedElements) window.archiveBoxProcessedElements = new Set(); + window.archiveBoxProcessedElements.add(elementId); + localLog('Marked as processed:', elementId); + }; + + // Check if element is processed + window.isElementProcessed = (elementId) => { + if (!window.archiveBoxProcessedElements) return false; + const isProcessed = window.archiveBoxProcessedElements.has(elementId); + if (isProcessed) { + localLog('Element already processed, skipping:', elementId); } + return isProcessed; }; // Improved scroll event listener with throttling @@ -311,10 +485,20 @@ function setupPageCapture() { // Schedule a new scan after user stops scrolling for 300ms scrollTimeout = setTimeout(() => { + localLog('Scroll detected, scanning visible posts'); scanVisiblePosts(); }, 300); }); + // Handle window resize events to capture posts that become visible + window.addEventListener('resize', () => { + if (window.archiveBoxResizeTimer) clearTimeout(window.archiveBoxResizeTimer); + window.archiveBoxResizeTimer = setTimeout(() => { + localLog('Window resized, scanning for newly visible posts'); + scanVisiblePosts(); + }, 500); + }); + // Add mutation observer to detect new Reddit posts dynamically added to the page const observeNewContent = () => { const targetNode = document.body; @@ -357,6 +541,7 @@ function setupPageCapture() { if (hasNewPosts) { // Use a small delay to ensure the DOM is fully updated setTimeout(() => { + localLog('Mutation observer detected new posts'); scanVisiblePosts(); }, 100); } @@ -367,98 +552,193 @@ function setupPageCapture() { // Start observing the target node for configured mutations observer.observe(targetNode, config); + localLog('Mutation observer started'); }; // Start the mutation observer observeNewContent(); - // Do initial scan - console.log('[ArchiveBox] Performing initial scan'); - // Small delay for initial scan to ensure the page is fully loaded + // Do initial scan with a small delay to ensure page is fully loaded + localLog('Performing initial scan'); setTimeout(() => { scanVisiblePosts(); }, 300); function scanVisiblePosts() { - // Process shreddit-post elements (new Reddit) - scanElements('shreddit-post', (post) => { - const permalink = post.getAttribute('permalink'); - const postTitle = post.getAttribute('post-title'); - const subredditName = post.getAttribute('subreddit-prefixed-name'); + // Check enable status by sending a message to background script + chrome.runtime.sendMessage({type: 'getEnableStatus'}, (response) => { + const isEnabled = response && response.enableScrollCapture; - if (permalink) { - const fullUrl = permalink.startsWith('http') ? - permalink : `https://www.reddit.com${permalink}`; - - // Extract subreddit from prefixed name (r/subreddit) - let subreddit = ''; - if (subredditName && subredditName.startsWith('r/')) { - subreddit = subredditName.substring(2); - } - - return { - url: fullUrl, - title: postTitle || document.title, - tags: ['reddit', subreddit] - }; + if (!isEnabled) { + localLog('Automatic content capture disabled, not scanning posts'); + return; } - return null; - }); - - // Process .thing.link elements (old Reddit) - scanElements('.thing.link', (post) => { - const permalink = post.getAttribute('data-permalink'); - if (permalink) { - const fullUrl = `https://www.reddit.com${permalink}`; - const title = post.querySelector('.title a')?.textContent || ''; - const subreddit = post.getAttribute('data-subreddit') || ''; + + localLog('Scanning visible posts, window size:', window.innerWidth, 'x', window.innerHeight); + + // Process shreddit-post elements (new Reddit) + scanElements('shreddit-post', (post) => { + const permalink = post.getAttribute('permalink'); + const postTitle = post.getAttribute('post-title'); + const subredditName = post.getAttribute('subreddit-prefixed-name'); - return { - url: fullUrl, - title: title, - tags: ['reddit', subreddit] - }; - } - return null; + if (permalink) { + const fullUrl = permalink.startsWith('http') ? + permalink : `https://www.reddit.com${permalink}`; + + // Extract subreddit from prefixed name (r/subreddit) + let subreddit = ''; + if (subredditName && subredditName.startsWith('r/')) { + subreddit = subredditName.substring(2); + } + + localLog('Found Reddit post:', { + title: postTitle, + subreddit: subreddit, + url: fullUrl + }); + + return { + url: fullUrl, + title: postTitle || document.title, + tags: ['reddit', subreddit] + }; + } + return null; + }); + + // Process .thing.link elements (old Reddit) + scanElements('.thing.link', (post) => { + const permalink = post.getAttribute('data-permalink'); + if (permalink) { + const fullUrl = `https://www.reddit.com${permalink}`; + const title = post.querySelector('.title a')?.textContent || ''; + const subreddit = post.getAttribute('data-subreddit') || ''; + + localLog('Found Old Reddit post:', { + title: title, + subreddit: subreddit, + url: fullUrl + }); + + return { + url: fullUrl, + title: title, + tags: ['reddit', subreddit] + }; + } + return null; + }); }); } function scanElements(selector, extractFn) { const elements = document.querySelectorAll(selector); if (elements.length > 0) { - console.log(`[ArchiveBox] Found ${elements.length} elements matching '${selector}'`); + localLog(`Found ${elements.length} elements matching '${selector}'`); + } else { + localLog(`No elements found matching '${selector}'`); } Array.from(elements).forEach(element => { + // Generate a unique ID for this element if it doesn't have one + const elementId = element.id || `archivebox-${selector}-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`; + if (!element.id) { + element.id = elementId; + localLog('Assigned ID to element:', elementId); + } + // Skip already processed elements - if (window.archiveBoxProcessedElements.has(element)) return; + if (window.isElementProcessed(elementId)) return; - // Check if the element is visible in the viewport + // Check if element is at least partially visible in viewport const rect = element.getBoundingClientRect(); - const isVisible = ( - rect.top >= 0 && - rect.left >= 0 && - rect.bottom <= (window.innerHeight || document.documentElement.clientHeight) && - rect.right <= (window.innerWidth || document.documentElement.clientWidth) + + // New visibility check: ANY part of the element is visible + const isPartiallyVisible = ( + rect.bottom > 0 && + rect.right > 0 && + rect.top < (window.innerHeight || document.documentElement.clientHeight) && + rect.left < (window.innerWidth || document.documentElement.clientWidth) ); - // Only process visible elements - if (!isVisible) return; + // Only process partially visible elements + if (!isPartiallyVisible) { + localLog('Element not visible in viewport, skipping:', elementId); + return; + } + + localLog('Element visible in viewport:', elementId); // Extract entry data const entry = extractFn(element); - if (!entry) return; + if (!entry) { + localLog('Failed to extract entry data from element:', elementId); + return; + } - // Mark as processed - window.archiveBoxProcessedElements.add(element); + // Mark as processed using new method + window.markElementAsProcessed(elementId); - // Add to throttled queue instead of sending immediately + // Add to throttled queue window.queueCaptureEntry(entry); - // Show status - window.showArchiveBoxStatus(`Captured: ${entry.title.substring(0, 30)}...`); + // Show status with improved status indicator + window.showArchiveBoxStatus(`${entry.title.substring(0, 40)}...`); - console.log(`[ArchiveBox] Queued for capture: ${entry.url}`); + localLog(`Queued for capture: ${entry.url} (window size: ${window.innerWidth}x${window.innerHeight})`); }); } } + +// This global event listener ensures we capture Reddit posts after page load +chrome.tabs.onUpdated.addListener((tabId, changeInfo, tab) => { + // Only process complete loads on Reddit + if (changeInfo.status !== 'complete' || !tab.url.includes('reddit.com')) return; + + debugLog('Reddit tab updated to complete:', tab.url); + + // Wait a moment for the page to fully render + setTimeout(() => { + debugLog('Executing setupPageCapture after delay'); + chrome.scripting.executeScript({ + target: {tabId: tabId}, + function: setupPageCapture + }).catch(err => { + console.error('Error setting up page capture:', err); + debugLog('Error setting up page capture:', { + message: err.message, + tabId: tabId, + url: tab.url + }); + }); + }, 1500); +}); + +// Handle existing Reddit tabs on startup or install +function setupExistingTabs() { + debugLog('Checking for existing Reddit tabs'); + + chrome.tabs.query({url: "*://*.reddit.com/*"}, (tabs) => { + debugLog(`Found ${tabs.length} existing Reddit tabs`); + + tabs.forEach(tab => { + debugLog(`Setting up Reddit capture on existing tab: ${tab.id} - ${tab.url}`); + chrome.scripting.executeScript({ + target: {tabId: tab.id}, + function: setupPageCapture + }).catch(err => { + console.error('Error setting up page capture on existing tab:', err); + debugLog('Error details:', { + message: err.message, + tabId: tab.id, + url: tab.url + }); + }); + }); + }); +} + +// Call this function when the extension starts or is installed +chrome.runtime.onStartup.addListener(setupExistingTabs); +chrome.runtime.onInstalled.addListener(setupExistingTabs); diff --git a/manifest.json b/manifest.json index 4d5364d..d85a0b3 100755 --- a/manifest.json +++ b/manifest.json @@ -18,8 +18,8 @@ "history", "bookmarks" ], - "optional_host_permissions": [ - "*://*\/*" + "host_permissions": [ + "" ], "icons": { "16": "16.png", From f789cd3ef9348d27271266bdf428d7dbf7dc6a30 Mon Sep 17 00:00:00 2001 From: Tuxx Date: Thu, 20 Mar 2025 03:57:25 +0100 Subject: [PATCH 3/3] Refactor content capture architecture for improved reliability and maintainability This commit introduces a major architectural overhaul to the content capture system: - Created a modular site handler system to support multiple sites - Extracted Reddit-specific logic into dedicated reddit-handler.js - Implemented memory management with configurable limits - Added enhanced user controls for site-specific settings - Improved performance with better throttling and queuing - Added detailed capture statistics for monitoring - Enhanced UI with site detection and filtering - Fixed resource usage issues by limiting stored data - Improved error handling and recovery - Fixed syntax errors with proper async/await usage in background.js and options.js - Fixed pattern escaping issue in manifest.json web_accessible_resources This refactoring addresses key architectural issues: 1. Tight coupling between components 2. Excessive resource usage 3. Limited user configuration 4. Poor maintainability Known issues that still need to be addressed: Security vulnerabilities: - XSS risks in entries-tab.js and popup.js due to unsanitized string interpolation - Path traversal risk in reddit-handler.js URL normalization - Missing secure context verification in site-handlers.js Performance issues: - Memory leaks in state.observedPosts and processedUrls sets with insufficient pruning - Inefficient DOM operations in entries-tab.js causing full re-renders on each filter change - Redundant storage operations in reddit-handler.js (saving every 50 items) - Overly broad mutation observer in reddit-content.js Browser compatibility issues: - Missing API availability checks for features like navigator.clipboard.writeText() - Chrome-specific APIs used without fallbacks - CSS vendor prefixes missing in popup.js Architecture issues: - Multiple components with direct dependencies on same storage keys - Inconsistent error handling across files - Callback patterns that could be improved with async/await - HTML structure dependencies without validation Specific bugs: - Race conditions in site-handlers.js concurrent operations - Missing permissions verification in several features - Unclosed observers in reddit-content.js - Unsafe URL parsing without proper error handling The new architecture is more extensible, allowing for easier addition of new site handlers in the future, but these issues will need to be addressed in subsequent commits. --- background.js | 974 ++++++++++++++++++++-------------------------- config-tab.js | 261 ++++++++++++- entries-tab.js | 212 ++++++++-- manifest.json | 10 +- options.html | 59 ++- options.js | 7 +- popup.js | 96 +++++ reddit-content.js | 365 +++++++++++++++++ reddit-handler.js | 593 ++++++++++++++++++++++++++++ site-handlers.js | 254 ++++++++++++ utils.js | 135 +++++++ 11 files changed, 2358 insertions(+), 608 deletions(-) create mode 100644 reddit-content.js create mode 100644 reddit-handler.js create mode 100644 site-handlers.js diff --git a/background.js b/background.js index 49193bc..1055a88 100755 --- a/background.js +++ b/background.js @@ -1,9 +1,16 @@ // background.js import { addToArchiveBox } from "./utils.js"; +import * as RedditHandler from "./reddit-handler.js"; // Debug configuration -const DEBUG_MODE = false; // Easy toggle for debugging +const DEBUG_MODE = true; // Set to true to see debugging info + +// Configuration +const CONFIG = { + MAX_ENTRIES: 10000, // Maximum number of entries to store locally + STATUS_DISPLAY_TIME: 3000 // Time in ms to show status indicators +}; function debugLog(...args) { if (DEBUG_MODE) { @@ -11,95 +18,59 @@ function debugLog(...args) { } } -// Queue for managing entry saving -const entrySaveQueue = []; -let processingQueue = false; +// State management - sites handlers registry +const siteHandlers = { + reddit: RedditHandler +}; -// Process the save queue -function processEntrySaveQueue() { - if (entrySaveQueue.length === 0) { - processingQueue = false; - debugLog('Queue empty, stopping processor'); - return; +// Content capture configuration +let captureEnabled = false; + +// Initialize background script +async function initialize() { + debugLog('Initializing background script'); + + // Load configuration + const { enableScrollCapture } = await chrome.storage.local.get('enableScrollCapture'); + captureEnabled = !!enableScrollCapture; + + // Initialize site handlers + if (captureEnabled) { + debugLog('Content capture is enabled, initializing handlers'); + Object.values(siteHandlers).forEach(handler => { + if (typeof handler.initialize === 'function') { + handler.initialize(); + } + }); } - processingQueue = true; - const entry = entrySaveQueue.shift(); - debugLog('Processing entry from queue:', entry.url); - - // Process entry - chrome.storage.local.get(['entries', 'enableScrollCapture'], (result) => { - // Only save entries if automatic capture is enabled - if (!result.enableScrollCapture) { - debugLog('Automatic content capture disabled, not saving entry'); - setTimeout(processEntrySaveQueue, 200); - return; - } - - const entries = result.entries || []; - debugLog('Current entries count:', entries.length); - - // Normalize URLs for more accurate comparison - const normalizeUrl = (url) => { - try { - const normalized = new URL(url); - // Remove trailing slashes, query parameters, and fragment - return normalized.origin + normalized.pathname.replace(/\/$/, ''); - } catch (e) { - debugLog('URL normalization error:', e); - return url; - } - }; - - const normalizedEntryUrl = normalizeUrl(entry.url); - debugLog('Normalized URL:', normalizedEntryUrl); - - // Check if this URL already exists in our entries (use normalized URLs) - const existingEntry = entries.find(e => normalizeUrl(e.url) === normalizedEntryUrl); - if (existingEntry) { - debugLog('URL already exists in entries, skipping:', entry.url); - setTimeout(processEntrySaveQueue, 200); - return; - } - - // Add custom tags if configured - chrome.storage.local.get(['scrollCaptureTags', 'archivebox_server_url', 'archivebox_api_key'], (tagResult) => { - debugLog('Server configuration:', { - serverUrl: tagResult.archivebox_server_url || 'Not configured', - apiKeySet: tagResult.archivebox_api_key ? 'Yes' : 'No' - }); - - const customTags = tagResult.scrollCaptureTags ? - tagResult.scrollCaptureTags.split(',').map(tag => tag.trim()) : []; - - debugLog('Custom tags:', customTags); - - // Extract site tags - const siteTags = getSiteTags(entry.url); - debugLog('Site tags:', siteTags); + // Check all existing tabs to find any supported site tabs already open + chrome.tabs.query({}, (tabs) => { + if (captureEnabled) { + debugLog(`Found ${tabs.length} existing tabs, checking for supported sites`); - // Create the full entry object - const fullEntry = { - id: crypto.randomUUID(), - url: entry.url, - timestamp: entry.timestamp || new Date().toISOString(), - tags: ['auto-captured', ...siteTags, ...customTags, ...(entry.tags || [])], - title: entry.title || 'Captured content', - notes: `Auto-captured content: ${entry.url}` - }; - - debugLog('Saving new entry:', fullEntry); - entries.push(fullEntry); - - chrome.storage.local.set({ entries }, () => { - debugLog('Entry saved to local storage'); - // Process next item after a delay - increased for better throttling - setTimeout(processEntrySaveQueue, 500); + // Check each tab for supported sites + tabs.forEach(tab => { + if (tab.url) { + Object.entries(siteHandlers).forEach(([site, handler]) => { + if (handler.shouldCaptureUrl && handler.shouldCaptureUrl(tab.url)) { + debugLog(`Found existing ${site} tab:`, tab.url); + if (handler.injectContentScript) { + handler.injectContentScript(tab.id); + } + } + }); + } }); - }); + } }); + + debugLog('Background script initialized'); } +/** + * Listens for messages from content scripts and popup + */ chrome.runtime.onMessage.addListener((message, sender, sendResponse) => { debugLog('Message received:', message.type || message.action); @@ -120,11 +91,38 @@ chrome.runtime.onMessage.addListener((message, sender, sendResponse) => { // Handle content capture if (message.type === 'capture') { debugLog('Capture request received:', message.entry.url); - saveEntry(message.entry); + + if (!captureEnabled) { + debugLog('Content capture is disabled, ignoring capture request'); + sendResponse({ success: false, reason: 'Capture disabled' }); + return true; + } + + // Determine site handler based on URL or tags + const url = message.entry.url; + let handled = false; + + // Check if it's from Reddit + if (message.entry.tags.includes('reddit') || url.includes('reddit.com')) { + if (message.entry.priority === 'high') { + // Use high priority capture for viewport posts + RedditHandler.captureHighPriority(message.entry, sender.tab?.id); + } else { + // Let reddit handler decide what to do + RedditHandler.queueForCapture(message.entry, sender.tab?.id, 'normal'); + } + handled = true; + } + + // Generic handling for other sites or if no specific handler was found + if (!handled) { + saveEntry(message.entry); + } + sendResponse({ success: true }); } - // Add the new handler for getEnableStatus + // Enable status requests if (message.type === 'getEnableStatus') { chrome.storage.local.get(['enableScrollCapture'], (result) => { sendResponse({ enableScrollCapture: !!result.enableScrollCapture }); @@ -132,9 +130,118 @@ chrome.runtime.onMessage.addListener((message, sender, sendResponse) => { return true; // Keep the message channel open for async response } + // Show status notification in tabs + if (message.type === 'showStatus') { + const tabId = message.tabId || (sender.tab && sender.tab.id); + if (tabId) { + try { + showStatusInTab(tabId, message.message, message.count, message.immediate); + } catch (err) { + debugLog('Error showing status:', err); + } + } + sendResponse({ success: true }); + } + + // Get site handler stats + if (message.type === 'getStats') { + const stats = {}; + Object.entries(siteHandlers).forEach(([site, handler]) => { + if (handler.getStats) { + stats[site] = handler.getStats(); + } + }); + sendResponse({ stats }); + return true; + } + if (message.type === 'getSiteHandlerForUrl') { + try { + const url = message.url; + const handlerResult = findHandlerForUrl(url); + + if (handlerResult) { + const { id, handler } = handlerResult; + const handlers = getAllHandlers(); + const handlerInfo = handlers[id]; + + sendResponse({ + found: true, + handler: { + id, + name: handlerInfo.name, + description: handlerInfo.description, + version: handlerInfo.version + } + }); + } else { + sendResponse({ found: false }); + } + } catch (error) { + console.error('Error finding handler for URL:', error); + sendResponse({ found: false, error: error.message }); + } + return true; + } + + // Get all site handlers + if (message.type === 'getSiteHandlers') { + try { + const handlers = getAllHandlers(); + sendResponse({ handlers }); + } catch (error) { + console.error('Error getting site handlers:', error); + sendResponse({ handlers: {} }); + } + return true; + } + + // URL visited notification + if (message.type === 'urlVisited') { + try { + const url = message.url; + const handlerResult = findHandlerForUrl(url); + + if (handlerResult && typeof handlerResult.handler.onUrlVisited === 'function') { + handlerResult.handler.onUrlVisited(url); + } + + sendResponse({ success: true }); + } catch (error) { + console.error('Error handling URL visit:', error); + sendResponse({ success: false }); + } + return true; + } + + // Configuration change notification + if (message.type === 'captureConfigChanged') { + try { + const { config } = message; + + // Update enabled state + captureEnabled = !!config.enableScrollCapture; + + // Notify handlers + Object.values(siteHandlers).forEach(handler => { + if (typeof handler.onConfigChanged === 'function') { + handler.onConfigChanged(config); + } + }); + + sendResponse({ success: true }); + } catch (error) { + console.error('Error handling config change:', error); + sendResponse({ success: false }); + } + return true; + } + return true; // Indicate async response }); +/** + * Handle click on extension icon + */ chrome.action.onClicked.addListener(async (tab) => { debugLog('Extension icon clicked on tab:', tab.url); @@ -161,7 +268,7 @@ chrome.action.onClicked.addListener(async (tab) => { await chrome.storage.local.set({ entries }); debugLog('Entry saved to local storage'); - // Inject scripts + // Inject popup script debugLog('Injecting popup script into tab'); await chrome.scripting.executeScript({ target: { tabId: tab.id }, @@ -171,10 +278,10 @@ chrome.action.onClicked.addListener(async (tab) => { }); }); -chrome.contextMenus.onClicked.addListener(onClickContextMenuSave); - -// A generic onclick callback function. -async function onClickContextMenuSave(item, tab) { +/** + * Handle context menu click + */ +chrome.contextMenus.onClicked.addListener(async function(item, tab) { debugLog('Context menu save clicked for tab:', tab.url); // Don't try to execute script on chrome:// URLs @@ -200,7 +307,7 @@ async function onClickContextMenuSave(item, tab) { await chrome.storage.local.set({ entries }); debugLog('Entry saved to local storage'); - // Inject scripts + // Inject popup script debugLog('Injecting popup script into tab'); await chrome.scripting.executeScript({ target: { tabId: tab.id }, @@ -208,45 +315,182 @@ async function onClickContextMenuSave(item, tab) { }).catch(err => { console.error('Error injecting script:', err); }); -} +}); +/** + * Handle extension installation and updates + */ chrome.runtime.onInstalled.addListener(function () { debugLog('Extension installed or updated'); + + // Create context menu chrome.contextMenus.create({ id: 'save_to_archivebox_ctxmenu', title: 'Save to ArchiveBox', }); + + // Set up configuration defaults + initializeConfiguration(); + + // Initialize the extension + initialize(); }); -// Replace the saveEntry function with this throttled version -function saveEntry(entry) { - // Don't save if no URL - if (!entry || !entry.url) { - debugLog('Invalid entry, not saving', entry); - return; +/** + * Set up configuration defaults if needed + */ +async function initializeConfiguration() { + const config = await chrome.storage.local.get([ + 'archivebox_server_url', + 'archivebox_api_key', + 'enableScrollCapture', + 'scrollCaptureTags' + ]); + + const updates = {}; + + // Set default values if undefined + if (config.archivebox_server_url === undefined) { + updates.archivebox_server_url = ''; } - debugLog('Queueing entry for saving:', entry.url); + if (config.archivebox_api_key === undefined) { + updates.archivebox_api_key = ''; + } - // Add to queue - entrySaveQueue.push(entry); + if (config.enableScrollCapture === undefined) { + updates.enableScrollCapture = false; + } - // Start processing if not already running - if (!processingQueue) { - debugLog('Starting queue processor'); - processEntrySaveQueue(); + if (config.scrollCaptureTags === undefined) { + updates.scrollCaptureTags = ''; + } + + // Save defaults if needed + if (Object.keys(updates).length > 0) { + await chrome.storage.local.set(updates); + debugLog('Set default config values:', updates); } } -// Extract site name for tagging +/** + * Handle new tab creation + */ +chrome.tabs.onCreated.addListener((tab) => { + // We'll check if it's a supported site tab once the navigation completes + debugLog('New tab created:', tab.id); +}); + +/** + * Handle tab navigation to detect supported sites + */ +chrome.tabs.onUpdated.addListener(async (tabId, changeInfo, tab) => { + // Only react when the tab has completed loading and we have a URL + if (changeInfo.status === 'complete' && tab.url) { + // Check if content capture is enabled + const { enableScrollCapture } = await chrome.storage.local.get('enableScrollCapture'); + captureEnabled = !!enableScrollCapture; + + if (captureEnabled) { + debugLog('Tab updated, checking for supported sites:', tab.url); + + // Check URL against each site handler + Object.entries(siteHandlers).forEach(([site, handler]) => { + if (handler.shouldCaptureUrl && handler.shouldCaptureUrl(tab.url)) { + debugLog(`Detected ${site} site in tab:`, tab.url); + if (handler.injectContentScript) { + handler.injectContentScript(tabId); + } + } + }); + } + } +}); + +/** + * Generic entry saving logic for any URL + */ +async function saveEntry(entry) { + try { + if (!entry || !entry.url) { + debugLog('Invalid entry, not saving', entry); + return { success: false, reason: 'Invalid entry' }; + } + + debugLog('Saving entry:', entry.url); + + // Get current entries + const { entries = [] } = await chrome.storage.local.get('entries'); + + // Check for duplicates + const normalizeUrl = (url) => { + try { + const normalized = new URL(url); + return normalized.origin + normalized.pathname.replace(/\/$/, ''); + } catch (e) { + debugLog('URL normalization error:', e); + return url; + } + }; + + const normalizedEntryUrl = normalizeUrl(entry.url); + const existingEntry = entries.find(e => normalizeUrl(e.url) === normalizedEntryUrl); + + if (existingEntry) { + debugLog('URL already exists in entries, skipping:', entry.url); + return { success: false, reason: 'URL already exists' }; + } + + // Add custom tags if configured + const { scrollCaptureTags } = await chrome.storage.local.get(['scrollCaptureTags']); + const customTags = scrollCaptureTags ? + scrollCaptureTags.split(',').map(tag => tag.trim()) : []; + + // Extract site tags + const siteTags = getSiteTags(entry.url); + + // Create the full entry object + const fullEntry = { + id: entry.id || crypto.randomUUID(), + url: entry.url, + timestamp: entry.timestamp || new Date().toISOString(), + tags: ['auto-captured', ...siteTags, ...customTags, ...(entry.tags || [])], + title: entry.title || 'Captured content', + notes: entry.notes || `Auto-captured content: ${entry.url}`, + favicon: entry.favicon + }; + + // Add to entries + entries.push(fullEntry); + + // Limit entries if exceeding maximum + if (entries.length > CONFIG.MAX_ENTRIES) { + // Sort by timestamp (oldest first) and remove excess + entries.sort((a, b) => new Date(a.timestamp) - new Date(b.timestamp)); + const removed = entries.splice(0, entries.length - CONFIG.MAX_ENTRIES); + debugLog(`Removed ${removed.length} oldest entries to stay under limit`); + } + + // Save entries + await chrome.storage.local.set({ entries }); + debugLog('Entry saved to local storage'); + + return { success: true }; + } catch (e) { + debugLog('Error saving entry:', e); + return { success: false, reason: e.message }; + } +} + +/** + * Extract site name for tagging + */ function getSiteTags(url) { try { const hostname = new URL(url).hostname; const domain = hostname .replace('www.', '') - .replace('.com', '') - .replace('.org', '') - .replace('.net', ''); + .replace(/\.(com|org|net|io|gov|edu)$/, ''); return [domain]; } catch (e) { debugLog('Error extracting site tags:', e); @@ -254,130 +498,83 @@ function getSiteTags(url) { } } -// Setup content capture for Reddit -function setupContentCapture() { - debugLog('Setting up content capture listeners'); - // Setup page load detection - chrome.tabs.onUpdated.addListener((tabId, changeInfo, tab) => { - // Only run once the page is fully loaded - if (changeInfo.status !== 'complete') return; - - // Only run on Reddit - if (!tab.url.includes('reddit.com')) return; - - debugLog('Reddit page loaded, initializing capture:', tab.url); - - // Execute the content script immediately after page load - chrome.scripting.executeScript({ - target: {tabId: tabId}, - function: setupPageCapture +/** + * Show status message in tab + */ +async function showStatusInTab(tabId, message, count, immediate = false) { + try { + // Check if tab still exists before proceeding + try { + const tab = await chrome.tabs.get(tabId); + if (!tab) { + debugLog(`Tab ${tabId} no longer exists, skipping status update`); + return; + } + } catch (e) { + debugLog(`Tab ${tabId} error or no longer exists:`, e.message); + return; + } + + // Setup status indicator if not already present + await chrome.scripting.executeScript({ + target: { tabId: tabId }, + function: setupStatusIndicator, }).catch(err => { - console.error('Error setting up page capture:', err); - debugLog('Error details:', { - message: err.message, - tabUrl: tab.url, - tabId: tabId - }); + debugLog(`Error setting up status indicator in tab ${tabId}:`, err.message); + return; }); - }); -} - -// Call this function when the extension starts -chrome.runtime.onStartup.addListener(() => { - debugLog('Extension started'); - setupContentCapture(); - - // Check for existing Reddit tabs - chrome.tabs.query({url: "*://*.reddit.com/*"}, (tabs) => { - debugLog(`Found ${tabs.length} existing Reddit tabs`); - tabs.forEach(tab => { - debugLog(`Setting up Reddit capture on existing tab: ${tab.id} - ${tab.url}`); - chrome.scripting.executeScript({ - target: {tabId: tab.id}, - function: setupPageCapture - }).catch(err => { - console.error('Error setting up page capture on existing tab:', err); - }); + // Show the status message + await chrome.scripting.executeScript({ + target: { tabId: tabId }, + args: [message, count || 0, immediate], + function: (message, count, immediate) => { + // Add to status queue + if (!window.archiveBoxStatusQueue) window.archiveBoxStatusQueue = []; + window.archiveBoxStatusQueue.unshift(message); + + // Keep only 5 items + if (window.archiveBoxStatusQueue.length > 5) { + window.archiveBoxStatusQueue = window.archiveBoxStatusQueue.slice(0, 5); + } + + // Show status + const indicator = document.getElementById('archiveBoxStatusIndicator'); + const messageContainer = document.getElementById('archiveBoxStatusMessages'); + const countIndicator = document.getElementById('archiveBoxStatusCount'); + + if (indicator && messageContainer && countIndicator) { + // Update message list + messageContainer.innerHTML = window.archiveBoxStatusQueue.map(msg => + `
• ${msg}
` + ).join(''); + + // Update count + countIndicator.textContent = `Captured ${count} posts`; + + // Show indicator + indicator.style.opacity = '1'; + + // Auto hide + clearTimeout(window.archiveBoxStatusTimeout); + window.archiveBoxStatusTimeout = setTimeout(() => { + indicator.style.opacity = '0'; + }, 3000); + } + } + }).catch(err => { + debugLog(`Error showing status in tab ${tabId}:`, err.message); }); - }); -}); - -chrome.runtime.onInstalled.addListener(() => { - debugLog('Extension installed'); - setupContentCapture(); -}); - -// This function sets up the content capture on Reddit pages -function setupPageCapture() { - // Local logging function - function localLog(message, data) { - console.log('[ArchiveBox]', message, data || ''); + } catch (err) { + debugLog('Error showing status:', err); } +} - localLog('Setting up page capture', { - url: window.location.href, - windowSize: `${window.innerWidth}x${window.innerHeight}` - }); - - // Use window variables instead of chrome.storage for state tracking - if (window.archiveBoxSetupComplete) { - localLog('Setup already completed for this tab'); - scanVisiblePosts(); - return; - } - - // Mark as setup complete using window variable - window.archiveBoxSetupComplete = true; - window.archiveBoxProcessedElements = new Set(); - window.archiveBoxCaptureQueue = []; - window.archiveBoxStatusQueue = []; - - localLog('Performing initial setup'); - - // Setup throttled submission process - window.archiveBoxProcessingQueue = false; - - function processQueue() { - if (window.archiveBoxCaptureQueue.length === 0) { - window.archiveBoxProcessingQueue = false; - localLog('Capture queue empty, stopping processor'); - return; - } - - window.archiveBoxProcessingQueue = true; - const entry = window.archiveBoxCaptureQueue.shift(); - localLog('Processing from capture queue:', entry.url); - - chrome.runtime.sendMessage({ - type: 'capture', - entry: entry - }, () => { - // Add timeout for throttling - setTimeout(processQueue, 500); - }); - } - - // Function to add to queue and start processing if needed - window.queueCaptureEntry = (entry) => { - // Avoid duplicate entries in the queue by URL - if (!window.archiveBoxCaptureQueue.some(item => item.url === entry.url)) { - localLog('Adding to capture queue:', entry.url); - window.archiveBoxCaptureQueue.push(entry); - - // Start queue processing if not already running - if (!window.archiveBoxProcessingQueue) { - localLog('Starting capture queue processor'); - processQueue(); - } - } else { - localLog('URL already in queue, skipping:', entry.url); - } - }; - - // Create enhanced status indicator if it doesn't exist +/** + * Setup status indicator in tab + */ +function setupStatusIndicator() { if (!document.getElementById('archiveBoxStatusIndicator')) { - localLog('Creating status indicator'); const indicator = document.createElement('div'); indicator.id = 'archiveBoxStatusIndicator'; indicator.style.cssText = ` @@ -415,330 +612,15 @@ function setupPageCapture() { padding-top: 5px; `; indicator.appendChild(countIndicator); - } - - // Improved function to show multiple status messages - window.showArchiveBoxStatus = (message) => { - const indicator = document.getElementById('archiveBoxStatusIndicator'); - const messageContainer = document.getElementById('archiveBoxStatusMessages'); - const countIndicator = document.getElementById('archiveBoxStatusCount'); - - if (!indicator || !messageContainer || !countIndicator) { - localLog('Status indicator elements not found'); - return; - } - // Add this message to the queue - if (!window.archiveBoxStatusQueue) window.archiveBoxStatusQueue = []; - window.archiveBoxStatusQueue.push(message); - localLog('Added to status queue:', message); - - // Limit queue to last 5 items - if (window.archiveBoxStatusQueue.length > 5) { - window.archiveBoxStatusQueue.shift(); - } - - // Update the messages display - messageContainer.innerHTML = window.archiveBoxStatusQueue.map(msg => - `
• ${msg}
` - ).join(''); - - // Update count - countIndicator.textContent = `Captured ${window.archiveBoxStatusQueue.length} posts`; - - // Show the indicator - indicator.style.opacity = '1'; - - // Hide after a longer delay to account for multiple captures - clearTimeout(window.archiveBoxStatusTimeout); - window.archiveBoxStatusTimeout = setTimeout(() => { - indicator.style.opacity = '0'; - // Clear the queue after hiding - setTimeout(() => { - window.archiveBoxStatusQueue = []; - }, 500); - }, 3000); - }; - - // Store processed elements in window variables - window.markElementAsProcessed = (elementId) => { - if (!window.archiveBoxProcessedElements) window.archiveBoxProcessedElements = new Set(); - window.archiveBoxProcessedElements.add(elementId); - localLog('Marked as processed:', elementId); - }; - - // Check if element is processed - window.isElementProcessed = (elementId) => { - if (!window.archiveBoxProcessedElements) return false; - const isProcessed = window.archiveBoxProcessedElements.has(elementId); - if (isProcessed) { - localLog('Element already processed, skipping:', elementId); - } - return isProcessed; - }; - - // Improved scroll event listener with throttling - let scrollTimeout = null; - window.addEventListener('scroll', () => { - // Cancel any pending scan - if (scrollTimeout) clearTimeout(scrollTimeout); - - // Schedule a new scan after user stops scrolling for 300ms - scrollTimeout = setTimeout(() => { - localLog('Scroll detected, scanning visible posts'); - scanVisiblePosts(); - }, 300); - }); - - // Handle window resize events to capture posts that become visible - window.addEventListener('resize', () => { - if (window.archiveBoxResizeTimer) clearTimeout(window.archiveBoxResizeTimer); - window.archiveBoxResizeTimer = setTimeout(() => { - localLog('Window resized, scanning for newly visible posts'); - scanVisiblePosts(); - }, 500); - }); - - // Add mutation observer to detect new Reddit posts dynamically added to the page - const observeNewContent = () => { - const targetNode = document.body; - - // Observer configuration - const config = { - childList: true, - subtree: true, - attributes: false - }; - - // Callback to be executed when mutations are observed - const callback = function(mutationsList, observer) { - let hasNewPosts = false; - - for (const mutation of mutationsList) { - if (mutation.type === 'childList' && mutation.addedNodes.length) { - // Check if any added nodes contain potential Reddit posts - for (const node of mutation.addedNodes) { - if (node.nodeType === Node.ELEMENT_NODE) { - // Check for new content: either the node is a post or contains posts - if ( - (node.tagName === 'SHREDDIT-POST') || - (node.querySelector && ( - node.querySelector('shreddit-post') || - node.querySelector('.thing.link') - )) - ) { - hasNewPosts = true; - break; - } - } - } - } - - if (hasNewPosts) break; - } - - // Only scan if we detected new posts being added - if (hasNewPosts) { - // Use a small delay to ensure the DOM is fully updated - setTimeout(() => { - localLog('Mutation observer detected new posts'); - scanVisiblePosts(); - }, 100); - } - }; - - // Create an observer instance linked to the callback function - const observer = new MutationObserver(callback); - - // Start observing the target node for configured mutations - observer.observe(targetNode, config); - localLog('Mutation observer started'); - }; - - // Start the mutation observer - observeNewContent(); - - // Do initial scan with a small delay to ensure page is fully loaded - localLog('Performing initial scan'); - setTimeout(() => { - scanVisiblePosts(); - }, 300); - - function scanVisiblePosts() { - // Check enable status by sending a message to background script - chrome.runtime.sendMessage({type: 'getEnableStatus'}, (response) => { - const isEnabled = response && response.enableScrollCapture; - - if (!isEnabled) { - localLog('Automatic content capture disabled, not scanning posts'); - return; - } - - localLog('Scanning visible posts, window size:', window.innerWidth, 'x', window.innerHeight); - - // Process shreddit-post elements (new Reddit) - scanElements('shreddit-post', (post) => { - const permalink = post.getAttribute('permalink'); - const postTitle = post.getAttribute('post-title'); - const subredditName = post.getAttribute('subreddit-prefixed-name'); - - if (permalink) { - const fullUrl = permalink.startsWith('http') ? - permalink : `https://www.reddit.com${permalink}`; - - // Extract subreddit from prefixed name (r/subreddit) - let subreddit = ''; - if (subredditName && subredditName.startsWith('r/')) { - subreddit = subredditName.substring(2); - } - - localLog('Found Reddit post:', { - title: postTitle, - subreddit: subreddit, - url: fullUrl - }); - - return { - url: fullUrl, - title: postTitle || document.title, - tags: ['reddit', subreddit] - }; - } - return null; - }); - - // Process .thing.link elements (old Reddit) - scanElements('.thing.link', (post) => { - const permalink = post.getAttribute('data-permalink'); - if (permalink) { - const fullUrl = `https://www.reddit.com${permalink}`; - const title = post.querySelector('.title a')?.textContent || ''; - const subreddit = post.getAttribute('data-subreddit') || ''; - - localLog('Found Old Reddit post:', { - title: title, - subreddit: subreddit, - url: fullUrl - }); - - return { - url: fullUrl, - title: title, - tags: ['reddit', subreddit] - }; - } - return null; - }); - }); - } - - function scanElements(selector, extractFn) { - const elements = document.querySelectorAll(selector); - if (elements.length > 0) { - localLog(`Found ${elements.length} elements matching '${selector}'`); - } else { - localLog(`No elements found matching '${selector}'`); - } - - Array.from(elements).forEach(element => { - // Generate a unique ID for this element if it doesn't have one - const elementId = element.id || `archivebox-${selector}-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`; - if (!element.id) { - element.id = elementId; - localLog('Assigned ID to element:', elementId); - } - - // Skip already processed elements - if (window.isElementProcessed(elementId)) return; - - // Check if element is at least partially visible in viewport - const rect = element.getBoundingClientRect(); - - // New visibility check: ANY part of the element is visible - const isPartiallyVisible = ( - rect.bottom > 0 && - rect.right > 0 && - rect.top < (window.innerHeight || document.documentElement.clientHeight) && - rect.left < (window.innerWidth || document.documentElement.clientWidth) - ); - - // Only process partially visible elements - if (!isPartiallyVisible) { - localLog('Element not visible in viewport, skipping:', elementId); - return; - } - - localLog('Element visible in viewport:', elementId); - - // Extract entry data - const entry = extractFn(element); - if (!entry) { - localLog('Failed to extract entry data from element:', elementId); - return; - } - - // Mark as processed using new method - window.markElementAsProcessed(elementId); - - // Add to throttled queue - window.queueCaptureEntry(entry); - - // Show status with improved status indicator - window.showArchiveBoxStatus(`${entry.title.substring(0, 40)}...`); - - localLog(`Queued for capture: ${entry.url} (window size: ${window.innerWidth}x${window.innerHeight})`); - }); + // Initialize status queue + window.archiveBoxStatusQueue = []; } } -// This global event listener ensures we capture Reddit posts after page load -chrome.tabs.onUpdated.addListener((tabId, changeInfo, tab) => { - // Only process complete loads on Reddit - if (changeInfo.status !== 'complete' || !tab.url.includes('reddit.com')) return; - - debugLog('Reddit tab updated to complete:', tab.url); - - // Wait a moment for the page to fully render - setTimeout(() => { - debugLog('Executing setupPageCapture after delay'); - chrome.scripting.executeScript({ - target: {tabId: tabId}, - function: setupPageCapture - }).catch(err => { - console.error('Error setting up page capture:', err); - debugLog('Error setting up page capture:', { - message: err.message, - tabId: tabId, - url: tab.url - }); - }); - }, 1500); +// Initialize on startup +chrome.runtime.onStartup.addListener(() => { + debugLog('Extension started'); + initialize(); }); -// Handle existing Reddit tabs on startup or install -function setupExistingTabs() { - debugLog('Checking for existing Reddit tabs'); - - chrome.tabs.query({url: "*://*.reddit.com/*"}, (tabs) => { - debugLog(`Found ${tabs.length} existing Reddit tabs`); - - tabs.forEach(tab => { - debugLog(`Setting up Reddit capture on existing tab: ${tab.id} - ${tab.url}`); - chrome.scripting.executeScript({ - target: {tabId: tab.id}, - function: setupPageCapture - }).catch(err => { - console.error('Error setting up page capture on existing tab:', err); - debugLog('Error details:', { - message: err.message, - tabId: tab.id, - url: tab.url - }); - }); - }); - }); -} - -// Call this function when the extension starts or is installed -chrome.runtime.onStartup.addListener(setupExistingTabs); -chrome.runtime.onInstalled.addListener(setupExistingTabs); diff --git a/config-tab.js b/config-tab.js index fdeec6f..7635215 100755 --- a/config-tab.js +++ b/config-tab.js @@ -1,5 +1,6 @@ // Config tab initialization and handlers import { updateStatusIndicator, syncToArchiveBox, getArchiveBoxServerUrl } from './utils.js'; +import { getAllHandlers, getAllStats } from './site-handlers.js'; export async function initializeConfigTab() { const configForm = document.getElementById('configForm'); @@ -15,7 +16,6 @@ export async function initializeConfigTab() { 'match_urls', 'exclude_urls', ]); - console.log('Got config values from storage:', archivebox_server_url, archivebox_api_key, match_urls, exclude_urls); // migrate old config_archiveboxBaseUrl to archivebox_server_url const {config_archiveBoxBaseUrl} = await chrome.storage.sync.get('config_archiveBoxBaseUrl', ); @@ -242,12 +242,27 @@ export async function initializeConfigTab() { } }); - //Load scroll capture settings + // Initialize site-specific capture settings + await initializeSiteCapture(); +} + +/** + * Initialize site-specific capture settings + */ +async function initializeSiteCapture() { + // Load scroll capture settings const enableScrollCapture = document.getElementById('enableScrollCapture'); const scrollCaptureTags = document.getElementById('scrollCaptureTags'); - const { enableScrollCapture: savedEnableScrollCapture, scrollCaptureTags: savedScrollCaptureTags } = - await chrome.storage.local.get(['enableScrollCapture', 'scrollCaptureTags']); + const { + enableScrollCapture: savedEnableScrollCapture, + scrollCaptureTags: savedScrollCaptureTags, + redditCaptureConfig + } = await chrome.storage.local.get([ + 'enableScrollCapture', + 'scrollCaptureTags', + 'redditCaptureConfig' + ]); enableScrollCapture.checked = !!savedEnableScrollCapture; scrollCaptureTags.value = savedScrollCaptureTags || ''; @@ -255,11 +270,249 @@ export async function initializeConfigTab() { // Add event handlers for scroll capture settings enableScrollCapture.addEventListener('change', async () => { await chrome.storage.local.set({ enableScrollCapture: enableScrollCapture.checked }); + + // Notify all tabs of the change + const tabs = await chrome.tabs.query({}); + for (const tab of tabs) { + try { + chrome.tabs.sendMessage(tab.id, { + type: 'captureStatusChanged', + enabled: enableScrollCapture.checked + }).catch(() => {/* Ignore errors for tabs that don't have the content script */}); + } catch (e) { + // Ignore errors for tabs that don't have the content script + } + } }); scrollCaptureTags.addEventListener('change', async () => { await chrome.storage.local.set({ scrollCaptureTags: scrollCaptureTags.value }); }); + + // Initialize Reddit-specific settings + await initializeRedditSettings(redditCaptureConfig); + + // Add site handlers information + populateSiteHandlersInfo(); + + // Add capture stats display + await updateCaptureStats(); + + // Set up stats refresh button + document.getElementById('refreshCaptureStats')?.addEventListener('click', updateCaptureStats); +} + +/** + * Initialize Reddit-specific settings + */ +async function initializeRedditSettings(savedConfig) { + // Default configuration + const defaultConfig = { + captureSubreddits: true, + capturePostDetails: true, + captureComments: false, + commentsDepth: 2, + excludedSubreddits: [], + includedSubreddits: [], + maxProcessedPosts: 1000 + }; + + // Merge saved config with defaults + const config = { ...defaultConfig, ...(savedConfig || {}) }; + + // Create Reddit-specific settings UI if it doesn't exist + const redditSettingsContainer = document.getElementById('redditSettingsContainer'); + if (!redditSettingsContainer) { + return; // Element doesn't exist, can't add settings + } + + // Build the Reddit settings UI + redditSettingsContainer.innerHTML = ` +
+
+
Reddit Capture Settings
+
+
+
+
+
+ + +
+
+ + +
+
+ + +
+
+
+
+ + +
+
+ + +
Maximum number of post IDs to keep in memory (100-10000)
+
+
+
+ +
+
+ + +
Only posts from these subreddits will be captured
+
+
+ + +
Posts from these subreddits will never be captured
+
+
+ + +
+
+ `; + + // Add event listener for saving settings + document.getElementById('saveRedditSettings').addEventListener('click', async () => { + // Collect the current settings + const newConfig = { + captureSubreddits: document.getElementById('redditCaptureSubreddits').checked, + capturePostDetails: document.getElementById('redditCapturePostDetails').checked, + captureComments: document.getElementById('redditCaptureComments').checked, + commentsDepth: parseInt(document.getElementById('redditCommentsDepth').value, 10), + maxProcessedPosts: parseInt(document.getElementById('redditMaxProcessedPosts').value, 10), + includedSubreddits: document.getElementById('redditIncludedSubreddits').value + .split(',') + .map(s => s.trim().toLowerCase()) + .filter(s => s), + excludedSubreddits: document.getElementById('redditExcludedSubreddits').value + .split(',') + .map(s => s.trim().toLowerCase()) + .filter(s => s) + }; + + // Validate settings + if (newConfig.maxProcessedPosts < 100) newConfig.maxProcessedPosts = 100; + if (newConfig.maxProcessedPosts > 10000) newConfig.maxProcessedPosts = 10000; + + // Save the settings + await chrome.storage.local.set({ redditCaptureConfig: newConfig }); + + // Show success message + alert('Reddit settings saved successfully'); + }); +} + +/** + * Populate site handlers information + */ +function populateSiteHandlersInfo() { + const handlersContainer = document.getElementById('siteHandlersContainer'); + if (!handlersContainer) return; + + const handlers = getAllHandlers(); + + // Create the handlers info UI + handlersContainer.innerHTML = ` +
+
+
Site Handlers
+
+
+ + + + + + + + + + + ${Object.entries(handlers).map(([id, handler]) => ` + + + + + + + `).join('')} + +
HandlerDomainsVersionDescription
${handler.name}${handler.domains.join(', ')}${handler.version}${handler.description}
+
+
+ `; +} + +/** + * Update capture stats + */ +async function updateCaptureStats() { + const statsContainer = document.getElementById('captureStatsContainer'); + if (!statsContainer) return; + + // Get stats from all handlers + const stats = await new Promise(resolve => { + chrome.runtime.sendMessage({ type: 'getStats' }, response => { + resolve(response?.stats || {}); + }); + }); + + // Create the stats UI + statsContainer.innerHTML = ` +
+
+
Capture Statistics
+ +
+
+
+ ${Object.entries(stats).map(([site, siteStats]) => ` +
+
+
+
${site.charAt(0).toUpperCase() + site.slice(1)} Stats
+
+
+
    + ${Object.entries(siteStats).map(([key, value]) => ` +
  • + ${key.replace(/([A-Z])/g, ' $1').replace(/^./, str => str.toUpperCase())} + ${value} +
  • + `).join('')} +
+
+
+
+ `).join('')} +
+
+
+ `; + + // Re-attach the refresh button event listener + document.getElementById('refreshCaptureStats')?.addEventListener('click', updateCaptureStats); } // Using shared syncToArchiveBox function from utils.js diff --git a/entries-tab.js b/entries-tab.js index 9481646..af02568 100755 --- a/entries-tab.js +++ b/entries-tab.js @@ -1,4 +1,38 @@ import { filterEntries, addToArchiveBox, downloadCsv, downloadJson, syncToArchiveBox, updateStatusIndicator, getArchiveBoxServerUrl } from './utils.js'; +import { getAllHandlers, shouldCaptureUrl } from './site-handlers.js'; + +/** + * Get site handler information for an entry + * @param {Object} entry - The entry to get handler info for + * @return {Object|null} Handler info if found + */ +async function getSiteHandlerForEntry(entry) { + if (!entry || !entry.url) return null; + + try { + // Send message to background script + return new Promise(resolve => { + chrome.runtime.sendMessage( + { type: 'getSiteHandlerForUrl', url: entry.url }, + response => resolve(response?.handler || null) + ); + }); + } catch (error) { + console.error('Error getting site handler for entry:', error); + return null; + } +} + +function getSiteHandlerIcon(handlerId) { + const icons = { + reddit: '💬', + twitter: '🐦', + youtube: '▶️', + default: '🌐' + }; + + return icons[handlerId] || icons.default; +} export async function renderEntries(filterText = '', tagFilter = '') { const { entries = [] } = await chrome.storage.local.get('entries'); @@ -16,23 +50,85 @@ export async function renderEntries(filterText = '', tagFilter = '') { // Display filtered entries const entriesList = document.getElementById('entriesList'); - entriesList.innerHTML = filteredEntries.map(entry => ` -
-
- - ${new Date(entry.timestamp).toISOString().replace('T', ' ').split('.')[0]} - -
- ${entry.url} -
-
- ${entry.tags.length ? ` -

- ${entry.tags.map(tag => - `${tag}` - ).join('')} -

- ` : ''} + // Add a custom style for site handler badges if not already present + if (!document.getElementById('siteHandlerStyles')) { + const style = document.createElement('style'); + style.id = 'siteHandlerStyles'; + style.textContent = ` + .site-handler-badge { + display: inline-flex; + align-items: center; + padding: 2px 6px; + font-size: 0.7rem; + background-color: #e3f2fd; + color: #0d6efd; + border-radius: 4px; + margin-right: 8px; + } + + .site-handler-icon { + margin-right: 2px; + } + `; + document.head.appendChild(style); + } + + // Get site handler info for each entry + const entryHandlers = await Promise.all( + filteredEntries.map(async entry => { + return { + entry, + handler: await getSiteHandlerForEntry(entry) + }; + }) + ); + + entriesList.innerHTML = entryHandlers.map(({ entry, handler }) => ` +
+ +
+
+
+ ${handler ? + ` + ${getSiteHandlerIcon(handler.id)} + ${handler.name} + ` : '' + } + ${entry.title || 'Untitled'} +
+ ${(()=>{ + return archivebox_server_url ? + `
+ + 🔗 Original + + + 📦 ArchiveBox + + + 🏛️ Archive.org + +
` + : '' })() + } +
+
+ + ${entry.url} + +
+
+ ${entry.tags.map(tag => + `${tag}` + ).join('')}
@@ -330,9 +426,58 @@ export function initializeEntriesTab() { window.history.pushState({}, '', newUrl); } + /** + * Render the tags list sidebar with frequency counts and site filters + * @param {Array} filteredEntries - The currently filtered entries + */ async function renderTagsList(filteredEntries) { const tagsList = document.getElementById('tagsList'); + // Add site handler filters + const handlers = getAllHandlers(); + + // Check if we have entries from supported sites + const siteCount = {}; + + filteredEntries.forEach(entry => { + Object.entries(handlers).forEach(([id, handler]) => { + if (handler.domains.some(domain => entry.url.includes(domain))) { + siteCount[id] = (siteCount[id] || 0) + 1; + } + }); + }); + + // Start with site filters if we have entries from supported sites + let tagsListHTML = ''; + + if (Object.keys(siteCount).length > 0) { + tagsListHTML += '
Sites
'; + + // Get current filter to highlight active site if any + const currentFilter = document.getElementById('filterInput').value.toLowerCase(); + + // Add site filters sorted by count + tagsListHTML += Object.entries(siteCount) + .sort(([, countA], [, countB]) => countB - countA) + .map(([siteId, count]) => { + const handler = handlers[siteId]; + const isActive = currentFilter === `site:${siteId}`; + + return ` + + + ${getSiteHandlerIcon(siteId)} ${handler.name} + + ${count} + + `; + }).join(''); + + tagsListHTML += '
Tags
'; + } + // Count occurrences of each tag in filtered entries only const tagCounts = filteredEntries.reduce((acc, entry) => { entry.tags.forEach(tag => { @@ -340,19 +485,19 @@ export function initializeEntriesTab() { }); return acc; }, {}); - + // Sort tags by frequency (descending) then alphabetically const sortedTags = Object.entries(tagCounts) .sort(([tagA, countA], [tagB, countB]) => { if (countB !== countA) return countB - countA; return tagA.localeCompare(tagB); }); - + // Get current filter to highlight active tag if any const currentFilter = document.getElementById('filterInput').value.toLowerCase(); - - // Render tags list with counts - tagsList.innerHTML = sortedTags.map(([tag, count]) => ` + + // Add tags with counts + tagsListHTML += sortedTags.map(([tag, count]) => ` @@ -360,7 +505,10 @@ export function initializeEntriesTab() { ${count} `).join(''); - + + // Set the HTML + tagsList.innerHTML = tagsListHTML; + // Add click handlers for tag filtering tagsList.querySelectorAll('.tag-filter').forEach(tagElement => { tagElement.addEventListener('click', (e) => { @@ -378,6 +526,24 @@ export function initializeEntriesTab() { renderEntries(); }); }); + + // Add click handlers for site filtering + tagsList.querySelectorAll('.site-filter').forEach(siteElement => { + siteElement.addEventListener('click', (e) => { + e.preventDefault(); + const site = siteElement.dataset.site; + const filterInput = document.getElementById('filterInput'); + + // Toggle site filter + if (filterInput.value.toLowerCase() === `site:${site}`) { + filterInput.value = ''; // Clear filter if clicking active site + } else { + filterInput.value = `site:${site}`; + } + + renderEntries(); + }); + }); } // Modify existing renderEntries function diff --git a/manifest.json b/manifest.json index d85a0b3..3082cb8 100755 --- a/manifest.json +++ b/manifest.json @@ -21,6 +21,12 @@ "host_permissions": [ "" ], + "content_scripts": [ + { + "matches": ["*://*.reddit.com/*"], + "js": ["reddit-content.js"] + } + ], "icons": { "16": "16.png", "32": "32.png", @@ -42,8 +48,8 @@ "type": "module" }, "web_accessible_resources": [{ - "resources": ["popup.css", "popup.js"], - "matches": ["*://*\/*"] + "resources": ["popup.css", "popup.js", "site-handlers.js", "reddit-handler.js"], + "matches": ["*://*/*"] }], "commands": { "save-to-archivebox-action": { diff --git a/options.html b/options.html index d753c64..03bca06 100755 --- a/options.html +++ b/options.html @@ -300,38 +300,33 @@
Advanced Users Only: Auto-archive URLs

-
-
Reddit Content Capture
-
- - -
-
- When enabled, the extension will automatically detect and save Reddit posts as you browse them. Posts are captured as they become visible in your viewport. -
- -
- - -
- -
-
Reddit Content Capture Features:
-
    -
  • ✓ Automatic capture of posts visible in your viewport
  • -
  • ✓ Detection of new content as you scroll
  • -
  • ✓ Capturing metadata including subreddit name
  • -
  • ✓ Throttled content capture to prevent performance issues
  • -
  • ✓ Visual indicator when content is captured
  • -
- - The extension uses a mutation observer to detect new posts as they load during infinite scrolling. - -
-
+
+
Content Capture Settings
+
+ + +
+
+ When enabled, the extension will automatically detect and save content from supported sites as you browse them. +
+ +
+ + +
+ + +
+ + +
+ + +
+
diff --git a/options.js b/options.js index 7c9c4e2..f01e9f3 100755 --- a/options.js +++ b/options.js @@ -3,15 +3,20 @@ import { initializeImport } from './import-tab.js'; import { initializePersonasTab } from './personas-tab.js'; import { initializeCookiesTab } from './cookies-tab.js'; import { initializeConfigTab } from './config-tab.js'; +import { initializeAll as initializeAllSiteHandlers } from './site-handlers.js'; // Initialize all tabs when options page loads -document.addEventListener('DOMContentLoaded', () => { +document.addEventListener('DOMContentLoaded', async () => { + // Initialize all tabs initializeEntriesTab(); initializeImport(); initializePersonasTab(); initializeCookiesTab(); initializeConfigTab(); + // Initialize site handlers + await initializeAllSiteHandlers(); + function changeTab() { if (window.location.hash && window.location.hash !== document.querySelector('a.nav-link.active').id) { console.log('Changing tab based on URL hash:', window.location.hash, `a.nav-link${window.location.hash}`, document.querySelector(`a.nav-link${window.location.hash}`)); diff --git a/popup.js b/popup.js index cb5b74f..245e1ee 100755 --- a/popup.js +++ b/popup.js @@ -3,6 +3,8 @@ const IS_IN_POPUP = window.location.href.startsWith('chrome-extension://') && window.location.href.endsWith('/popup.html'); const IS_ON_WEBSITE = !window.location.href.startsWith('chrome-extension://'); +window.handler_stats = null; // Global stats reference + window.popup_element = null; // Global reference to popup element window.hide_timer = null; @@ -65,6 +67,33 @@ async function sendToArchiveBox(url, tags) { return { ok: ok, status: status}; } +async function getSiteHandlerInfo(url) { + try { + if (!url) return null; + + const response = await chrome.runtime.sendMessage({ + type: 'getSiteHandlerForUrl', + url + }); + + return response?.handler || null; + } catch (error) { + console.log('Failed to get site handler info:', error); + return null; + } +} + +async function getHandlerStats() { + try { + const response = await chrome.runtime.sendMessage({ type: 'getStats' }); + window.handler_stats = response?.stats || {}; + return window.handler_stats; + } catch (error) { + console.log('Failed to get handler stats:', error); + return {}; + } +} + window.getCurrentEntry = async function() { const { entries = [] } = await chrome.storage.local.get('entries'); let current_entry = entries.find(entry => entry.url === window.location.href); @@ -411,6 +440,7 @@ window.createPopup = async function() { 🏛️

+
Saved locally... @@ -603,6 +633,56 @@ window.createPopup = async function() { selectedIndex = -1; } }); + // Check if this URL has a specific handler and show info + const siteHandlerInfo = await getSiteHandlerInfo(current_entry.url); + const statsContainer = popup.querySelector('.site-handler-info'); + + if (siteHandlerInfo) { + // Update the style for the handler info + const style = doc.createElement('style'); + style.textContent += ` + .site-handler-info { + font-size: 12px; + margin-bottom: 8px; + color: #f0f0f0; + } + + .handler-stats { + display: flex; + gap: 8px; + margin-top: 4px; + } + + .stat-item { + background: rgba(255, 255, 255, 0.1); + padding: 2px 6px; + border-radius: 4px; + font-size: 10px; + } + `; + doc.head.appendChild(style); + + // Show handler info + statsContainer.innerHTML = ` +
This page uses the ${siteHandlerInfo.name} handler for enhanced capture.
+
+ ${siteHandlerInfo.id === 'reddit' ? 'Reddit-specific options available in settings' : ''} +
+ `; + + // Get stats if available + const stats = await getHandlerStats(); + const handlerStats = stats[siteHandlerInfo.id]; + + if (handlerStats) { + const statsRow = statsContainer.querySelector('.handler-stats'); + if (handlerStats.captureCount) { + statsRow.innerHTML += `Captured: ${handlerStats.captureCount}`; + } + } + } else { + statsContainer.style.display = 'none'; + } input.focus(); console.log('+ Showed ArchiveBox popup in iframe'); @@ -657,6 +737,22 @@ window.createPopup = async function() { // Initial resize setTimeout(resizeIframe, 0); + + notifyUrlVisit(current_entry.url); } window.createPopup(); + +// Function to notify background script about URL visit +async function notifyUrlVisit(url) { + if (!url) return; + + try { + await chrome.runtime.sendMessage({ + type: 'urlVisited', + url + }); + } catch (error) { + // Ignore any errors + } +} diff --git a/reddit-content.js b/reddit-content.js new file mode 100644 index 0000000..d4283fd --- /dev/null +++ b/reddit-content.js @@ -0,0 +1,365 @@ +// reddit-content.js +// Content script for detecting Reddit posts in the viewport with improved architecture + +// Configuration +const CONFIG = { + OBSERVATION_THRESHOLD: 0.4, // Post must be 40% visible to trigger capture + ROOT_MARGIN: "100px", // Extend detection area beyond viewport + QUEUE_PROCESS_DELAY: 100, // Delay between processing items in queue + MUTATION_OBSERVER_DELAY: 150, // Delay after DOM changes before finding new posts + MAX_PROCESSED_POSTS: 1000, // Maximum number of processed post IDs to store + DEBUG_MODE: true // Enable debug logging +}; + +// State management +const state = { + observedPosts: new Set(), // Posts we've already seen and processed + postQueue: [], // Queue of posts to process in positional order + isProcessingQueue: false, // Whether we're currently processing the queue + captureCount: 0, // Number of posts captured in this session + isEnabled: false, // Whether capture is enabled + isInitialized: false // Whether we've initialized the system +}; + +/** + * Debug logging + */ +function debugLog(...args) { + if (CONFIG.DEBUG_MODE) { + console.log('[ArchiveBox Reddit]', ...args); + } +} + +/** + * Process posts in order from top to bottom of page + */ +function processNextPost() { + if (state.postQueue.length === 0) { + state.isProcessingQueue = false; + return; + } + + state.isProcessingQueue = true; + + // Sort post queue by Y position (top to bottom) + state.postQueue.sort((a, b) => a.position - b.position); + + // Process the topmost post + const postToProcess = state.postQueue.shift(); + capturePost(postToProcess.postElement, postToProcess.postId); + + // Continue processing the queue with a small delay to prevent UI blocking + setTimeout(processNextPost, CONFIG.QUEUE_PROCESS_DELAY); +} + +/** + * Queue a post for capture based on its position in the viewport + */ +function queuePostForCapture(postElement, postId) { + // Get the vertical position of the post + const rect = postElement.getBoundingClientRect(); + const position = rect.top; + + // Add to queue with position data + state.postQueue.push({ + postElement, + postId, + position + }); + + // Start processing queue if not already running + if (!state.isProcessingQueue) { + processNextPost(); + } +} + +/** + * Extract useful information from a post element + */ +function extractPostData(postElement, postId) { + // Extract post details - try different selectors to handle Reddit's different UI versions + const titleElement = postElement.querySelector( + 'h1, h3, [data-testid="post-title"], [data-click-id="body"] h2, a.title' + ); + + const linkElement = postElement.querySelector( + 'a.title, [data-click-id="body"], a[data-click-id="comments"], [data-testid="post-title"] a' + ); + + if (!titleElement) { + debugLog('Could not find title element in post:', postId); + return null; + } + + // Get title + const title = titleElement.textContent.trim(); + + // Get permalink/URL + let url = ''; + if (linkElement && linkElement.href) { + url = linkElement.href; + } else { + // Try to construct URL from post ID if it matches Reddit's post ID format + const redditId = postId.replace('t3_', ''); + if (redditId.length >= 6) { + // Try to extract subreddit + const subredditElement = postElement.querySelector('a[href^="/r/"]'); + const subredditName = subredditElement ? subredditElement.textContent.replace('r/', '') : ''; + + if (subredditName) { + url = `https://www.reddit.com/r/${subredditName}/comments/${redditId}/`; + } else { + url = `https://www.reddit.com/comments/${redditId}/`; + } + } + } + + if (!title || !url) { + debugLog('Insufficient data for post, skipping'); + return null; + } + + // Get subreddit + const subredditElement = postElement.querySelector('a[href^="/r/"]'); + const subreddit = subredditElement ? subredditElement.textContent.replace('r/', '') : ''; + + return { + url, + title, + subreddit + }; +} + +/** + * Capture post data and send to background script + */ +function capturePost(postElement, postId) { + // Only capture the post if we haven't already processed it + if (state.observedPosts.has(postId)) return; + + // Mark as processed and manage the max size of observedPosts + state.observedPosts.add(postId); + if (state.observedPosts.size > CONFIG.MAX_PROCESSED_POSTS) { + // Remove oldest entries (approximation since Sets don't guarantee order) + const excess = state.observedPosts.size - CONFIG.MAX_PROCESSED_POSTS; + const entries = Array.from(state.observedPosts).slice(0, excess); + entries.forEach(entry => state.observedPosts.delete(entry)); + debugLog(`Pruned ${excess} old post IDs from observed set`); + } + + // Extract post data + const postData = extractPostData(postElement, postId); + if (!postData) return; + + // Increment capture count + state.captureCount++; + + // Send to background script with high priority + chrome.runtime.sendMessage({ + type: 'capture', + entry: { + url: postData.url, + title: postData.title, + tags: ['reddit', postData.subreddit, 'viewport-captured'].filter(Boolean), + timestamp: new Date().toISOString(), + priority: 'high' // Mark as high priority + } + }); + + // Add visual indicator to the post + addVisualIndicator(postElement); + + // Show status immediately + chrome.runtime.sendMessage({ + type: 'showStatus', + message: `Captured: ${postData.title.substring(0, 40)}...`, + count: state.captureCount, + immediate: true // Request immediate display + }); + + debugLog('Captured post in viewport:', postData.title, postData.url); +} + +/** + * Add a small visual indicator to show the post has been captured + */ +function addVisualIndicator(postElement) { + // Create indicator if it doesn't exist + if (!postElement.querySelector('.archivebox-captured-indicator')) { + const indicator = document.createElement('div'); + indicator.className = 'archivebox-captured-indicator'; + indicator.style.cssText = ` + position: absolute; + top: 0; + right: 0; + background: rgba(0, 128, 0, 0.6); + color: white; + font-size: 10px; + padding: 2px 5px; + border-radius: 0 0 0 3px; + z-index: 9999; + `; + indicator.textContent = '✓ Archived'; + + // Make sure the post has a relative position for absolute positioning to work + if (getComputedStyle(postElement).position === 'static') { + postElement.style.position = 'relative'; + } + + postElement.appendChild(indicator); + } +} + +/** + * Set up intersection observer to detect posts as they become visible + */ +function setupObserver() { + debugLog('Setting up viewport observer for Reddit'); + + const observer = new IntersectionObserver((entries) => { + entries.forEach(entry => { + if (entry.isIntersecting && entry.intersectionRatio >= CONFIG.OBSERVATION_THRESHOLD) { + const postElement = entry.target; + + // Extract post ID to avoid processing the same post multiple times + const postId = postElement.id || + postElement.getAttribute('data-post-id') || + postElement.getAttribute('data-fullname') || + postElement.getAttribute('id'); + + if (!postId) return; + + // Queue for processing in top-to-bottom order + queuePostForCapture(postElement, postId); + } + }); + }, { + threshold: CONFIG.OBSERVATION_THRESHOLD, + rootMargin: CONFIG.ROOT_MARGIN + }); + + // Find and observe posts + function findAndObservePosts() { + // Attempt to find posts using different selectors for different Reddit versions + const postSelectors = [ + // Current "new" Reddit redesign + 'div[data-testid="post-container"]', + '.Post', + '[data-test-id="post-content"]', + + // Old Reddit design + '.thing[data-author]', + + // Mobile Reddit + 'article[data-testid="post"]', + + // Generic fallbacks that might work across versions + '[data-click-id="body"]', + '.scrollerItem' + ]; + + const postElements = document.querySelectorAll(postSelectors.join(', ')); + + if (postElements.length > 0) { + debugLog(`Found ${postElements.length} Reddit posts to observe`); + postElements.forEach(post => observer.observe(post)); + } + } + + // Initial find + findAndObservePosts(); + + // Set up mutation observer to detect new posts loaded during scrolling + const mutationObserver = new MutationObserver((mutations) => { + let shouldFindPosts = false; + + for (const mutation of mutations) { + if (mutation.addedNodes.length > 0) { + shouldFindPosts = true; + break; + } + } + + if (shouldFindPosts) { + // Wait a small amount of time for any post loading to complete + // This helps reduce redundant processing during rapid DOM changes + clearTimeout(state.mutationTimeout); + state.mutationTimeout = setTimeout(findAndObservePosts, CONFIG.MUTATION_OBSERVER_DELAY); + } + }); + + // Observe changes to the body and any feed containers + const feedContainers = [ + document.body, + ...document.querySelectorAll('.ListingLayout-outerContainer, .browse-container, #siteTable') + ]; + + feedContainers.forEach(container => { + if (container) { + mutationObserver.observe(container, { childList: true, subtree: true }); + } + }); + + return { + disconnect: () => { + observer.disconnect(); + mutationObserver.disconnect(); + debugLog('Observers disconnected'); + } + }; +} + +/** + * Initialize the content script + */ +function initialize() { + if (state.isInitialized) return; + + // Only run on Reddit domains + if (!window.location.hostname.includes('reddit.com')) { + return; + } + + debugLog('Reddit page detected, checking if capture is enabled'); + + // Check if capture is enabled in the extension settings + chrome.runtime.sendMessage({ type: 'getEnableStatus' }, function(response) { + if (response && response.enableScrollCapture) { + debugLog('Reddit capture enabled, setting up viewport detection'); + state.isEnabled = true; + state.observers = setupObserver(); + } else { + debugLog('Reddit capture is disabled in settings'); + state.isEnabled = false; + } + + state.isInitialized = true; + }); + + // Listen for status changes + chrome.runtime.onMessage.addListener((message) => { + if (message.type === 'captureStatusChanged') { + if (message.enabled && !state.isEnabled) { + // Capture was enabled + debugLog('Capture was enabled, setting up observers'); + state.isEnabled = true; + state.observers = setupObserver(); + } else if (!message.enabled && state.isEnabled) { + // Capture was disabled + debugLog('Capture was disabled, shutting down observers'); + state.isEnabled = false; + if (state.observers) { + state.observers.disconnect(); + state.observers = null; + } + } + } + }); +} + +// Handle initialization properly +if (document.readyState === 'loading') { + document.addEventListener('DOMContentLoaded', initialize); +} else { + initialize(); +} diff --git a/reddit-handler.js b/reddit-handler.js new file mode 100644 index 0000000..620b8c5 --- /dev/null +++ b/reddit-handler.js @@ -0,0 +1,593 @@ +// reddit-handler.js +// Manages all Reddit-specific capture functionality + +// Configuration +const CONFIG = { + CAPTURE_DELAY: 1000, // Delay between captures in ms + VIEWPORT_CAPTURE_DELAY: 100, // Quicker for visible posts + MAX_PROCESSED_URLS: 1000, // Maximum number of URLs to keep in memory + DEBUG_MODE: true, + BATCH_SIZE: 10, // Number of entries to batch save + STORAGE_KEY: 'reddit_processed_urls' // Key for storing processed URLs +}; + +// State management +let processedUrls = new Set(); +let captureCount = 0; +let isInitialized = false; + +// Queues with priority +const captureQueue = { + high: [], // Viewport-visible posts + normal: [], // Background discovered posts + processing: false +}; + +/** + * Debug logging + */ +function debugLog(...args) { + if (CONFIG.DEBUG_MODE) { + console.log('[Reddit Handler]', ...args); + } +} + +/** + * Initialize the Reddit handler + */ +export async function initialize() { + if (isInitialized) return; + + debugLog('Initializing Reddit handler'); + + // Load previously processed URLs from storage + const storage = await chrome.storage.local.get(CONFIG.STORAGE_KEY); + if (storage[CONFIG.STORAGE_KEY]) { + try { + const storedUrls = JSON.parse(storage[CONFIG.STORAGE_KEY]); + processedUrls = new Set(storedUrls); + debugLog(`Loaded ${processedUrls.size} previously processed URLs`); + } catch (e) { + debugLog('Error parsing stored URLs:', e); + processedUrls = new Set(); + } + } + + // Reset capture count + captureCount = 0; + + // Setup listeners + setupRedditListeners(); + + isInitialized = true; + debugLog('Reddit handler initialized'); + + // Start queue processor + processQueue(); +} + +/** + * Setup listeners for Reddit-specific functionality + */ +function setupRedditListeners() { + // Listen for navigation to Reddit post pages + chrome.webRequest.onCompleted.addListener( + handleRedditNavigation, + { urls: ["*://*.reddit.com/*"] }, + [] + ); + + // Listen for POST requests that might contain Reddit data + chrome.webRequest.onBeforeRequest.addListener( + handleRedditApiRequest, + { urls: ["*://*.reddit.com/*"] }, + ["requestBody"] + ); +} + +/** + * Handle navigation to a Reddit post + */ +async function handleRedditNavigation(details) { + // Only interested in document navigation + if (details.type !== 'main_frame' && details.type !== 'sub_frame') { + return; + } + + // Check if URL contains Reddit and is a post + if (!details.url.includes('reddit.com') || !isRedditPostUrl(details.url)) { + return; + } + + // Get settings to see if we should capture + const { enableScrollCapture } = await chrome.storage.local.get(['enableScrollCapture']); + if (!enableScrollCapture) { + return; + } + + debugLog('Detected navigation to Reddit post:', details.url); + + // Inject content script for viewport detection + injectContentScript(details.tabId); + + // Wait for page to load title + setTimeout(async () => { + try { + // Get tab info + const tab = await chrome.tabs.get(details.tabId).catch(() => null); + if (!tab) return; + + // Process the URL + processRedditNavigationUrl(details.url, tab.title, details.tabId); + } catch (e) { + debugLog('Error processing Reddit navigation:', e); + } + }, 1000); +} + +/** + * Handle Reddit API requests that might contain post data + */ +async function handleRedditApiRequest(details) { + if (details.method !== "POST") return; + + // Check for relevant endpoints + const isRedditAPIEndpoint = + details.url.includes('/svc/shreddit/events') || + details.url.includes('/svc/shreddit/graphql') || + details.url.includes('/api/'); + + if (!isRedditAPIEndpoint) return; + + // Check if capture is enabled + const { enableScrollCapture } = await chrome.storage.local.get(['enableScrollCapture']); + if (!enableScrollCapture) { + return; + } + + try { + // Try to parse the request body if available + if (details.requestBody && details.requestBody.raw) { + for (const raw of details.requestBody.raw) { + if (raw.bytes) { + const decoder = new TextDecoder(); + const text = decoder.decode(raw.bytes); + + // Look for post data patterns + if (text.includes('"post":') && text.includes('"title":')) { + extractPostsFromJson(text, details.tabId); + } else if (text.includes('"subreddit_name":') && text.includes('"title":')) { + extractPostsFromJson(text, details.tabId); + } + } + } + } + } catch (e) { + debugLog('Error processing request body:', e); + } +} + +/** + * Extract posts from JSON data + */ +function extractPostsFromJson(jsonText, tabId) { + try { + // For debugging, log a sample of what we're trying to parse + debugLog('Parsing JSON data sample:', jsonText.substring(0, 200)); + + // Try to parse the JSON + let data = null; + try { + data = JSON.parse(jsonText); + } catch (e) { + debugLog('Failed to parse JSON:', e.message); + return; + } + + // Check for Reddit's specific structure with "info" array + if (data && data.info && Array.isArray(data.info)) { + debugLog('Found Reddit info array with', data.info.length, 'items'); + + // Process each item in the info array + data.info.forEach(item => { + // Check if this item has a post object + if (item && item.post) { + // Extract the post data + const post = item.post; + + // Check for title field + if (post.title) { + debugLog('Found post with title:', post.title); + + // Create URL + let url = ''; + if (post.url && post.url.startsWith('/')) { + url = 'https://www.reddit.com' + post.url; + } else if (post.url) { + url = post.url; + } else if (post.id && post.id.startsWith('t3_')) { + // Construct URL from post ID + const postId = post.id.substring(3); + + // Include subreddit if available + if (post.subreddit_name) { + const subreddit = post.subreddit_name.replace('r/', ''); + url = `https://www.reddit.com/r/${subreddit}/comments/${postId}`; + } else { + url = `https://www.reddit.com/comments/${postId}`; + } + } + + if (url) { + // Extract subreddit + let subreddit = ''; + if (post.subreddit_name) { + subreddit = post.subreddit_name.replace('r/', ''); + } + + // Create post data object + const postData = { + url: url, + title: post.title, + subreddit: subreddit, + timestamp: new Date().toISOString() + }; + + // Queue the post for processing with normal priority + queueForCapture(postData, tabId, 'normal'); + } + } + } + }); + } + } catch (e) { + debugLog('Error processing JSON data:', e); + } +} + +/** + * Check if URL is a Reddit post + */ +function isRedditPostUrl(url) { + try { + if (!url.includes('reddit.com')) return false; + + const parsedUrl = new URL(url); + return parsedUrl.pathname.includes('/comments/'); + } catch (e) { + return false; + } +} + +/** + * Process a Reddit navigation URL + */ +function processRedditNavigationUrl(url, pageTitle, tabId) { + try { + const parsedUrl = new URL(url); + const pathParts = parsedUrl.pathname.split('/'); + + // Check for /comments/ format + if (pathParts.includes('comments')) { + const commentsIndex = pathParts.indexOf('comments'); + + // Need at least comment ID + if (commentsIndex + 1 < pathParts.length) { + // Get subreddit if present + let subreddit = ''; + if (pathParts[1] === 'r' && pathParts[2]) { + subreddit = pathParts[2]; + } + + // Clean up title + let title = pageTitle || ''; + if (title.includes(' - Reddit')) { + title = title.split(' - Reddit')[0].trim(); + } + + // Create post data + const postData = { + url: url, + title: title || 'Reddit Post', + subreddit: subreddit, + timestamp: new Date().toISOString() + }; + + // Queue for processing with normal priority + queueForCapture(postData, tabId, 'normal'); + } + } + } catch (e) { + debugLog('Error processing Reddit URL:', e); + } +} + +/** + * Queue a post for capture with priority + */ +function queueForCapture(postData, tabId, priority = 'normal') { + if (!postData || !postData.url || !postData.title) { + debugLog('Invalid post data, skipping:', postData); + return; + } + + // Normalize URL to avoid duplicates + const normalizedUrl = normalizeRedditUrl(postData.url); + + // Skip if already processed + if (processedUrls.has(normalizedUrl)) { + debugLog('Skipping already processed URL:', normalizedUrl); + return; + } + + debugLog(`Queueing Reddit post with ${priority} priority:`, postData.title); + + // Add to appropriate queue + captureQueue[priority].push({ + data: postData, + tabId: tabId, + queuedAt: Date.now() + }); + + // Start processing if not already running + if (!captureQueue.processing) { + processQueue(); + } +} + +/** + * Process the capture queue + */ +async function processQueue() { + if (captureQueue.high.length === 0 && captureQueue.normal.length === 0) { + captureQueue.processing = false; + debugLog('Queue empty, stopping processor'); + return; + } + + captureQueue.processing = true; + + // Process high priority queue first + let item; + let delay; + + if (captureQueue.high.length > 0) { + item = captureQueue.high.shift(); + delay = CONFIG.VIEWPORT_CAPTURE_DELAY; + } else { + item = captureQueue.normal.shift(); + delay = CONFIG.CAPTURE_DELAY; + } + + // Get age of item in queue + const queueAge = Date.now() - item.queuedAt; + debugLog(`Processing post from queue (age: ${queueAge}ms):`, item.data.title); + + // Normalize URL for deduplication + const normalizedUrl = normalizeRedditUrl(item.data.url); + + // Mark as processed + addToProcessedUrls(normalizedUrl); + captureCount++; + + // Create entry object + const entry = { + url: item.data.url, + title: item.data.title, + timestamp: item.data.timestamp, + tags: ['reddit', item.data.subreddit].filter(Boolean) + }; + + // Process the entry + await saveEntry(entry); + + // Show status in tab - check if tab still exists first + try { + const tab = await chrome.tabs.get(item.tabId); + if (tab) { + chrome.runtime.sendMessage({ + type: 'showStatus', + message: `${entry.title.substring(0, 40)}...`, + count: captureCount, + tabId: item.tabId + }); + } + } catch (err) { + debugLog(`Tab ${item.tabId} doesn't exist anymore, skipping status update`); + } + + // Schedule next item with delay + setTimeout(processQueue, delay); +} + +/** + * Add URL to processed URLs and manage the size limit + */ +function addToProcessedUrls(url) { + processedUrls.add(url); + + // If we've exceeded the limit, remove oldest items + // This is approximate since Sets don't guarantee order + if (processedUrls.size > CONFIG.MAX_PROCESSED_URLS) { + const urlsArray = Array.from(processedUrls); + const toRemove = urlsArray.slice(0, urlsArray.length - CONFIG.MAX_PROCESSED_URLS); + toRemove.forEach(u => processedUrls.delete(u)); + debugLog(`Removed ${toRemove.length} old URLs from processed set`); + } + + // Periodically save processed URLs to storage + if (processedUrls.size % 50 === 0) { + persistProcessedUrls(); + } +} + +/** + * Save processed URLs to storage + */ +async function persistProcessedUrls() { + const urlsArray = Array.from(processedUrls); + await chrome.storage.local.set({ + [CONFIG.STORAGE_KEY]: JSON.stringify(urlsArray) + }); + debugLog(`Saved ${urlsArray.length} processed URLs to storage`); +} + +/** + * Normalize Reddit URL to avoid duplicates + */ +function normalizeRedditUrl(url) { + try { + const parsedUrl = new URL(url); + + // Extract essential parts (subreddit & post ID) + const parts = parsedUrl.pathname.split('/'); + const commentsIndex = parts.indexOf('comments'); + + if (commentsIndex > 0 && commentsIndex + 1 < parts.length) { + // Get post ID + const postId = parts[commentsIndex + 1]; + + // Get subreddit if available + let subreddit = ''; + if (parts[1] === 'r' && parts[2]) { + subreddit = parts[2]; + } + + // Create canonical URL + if (subreddit) { + return `${parsedUrl.origin}/r/${subreddit}/comments/${postId}`; + } else { + return `${parsedUrl.origin}/comments/${postId}`; + } + } + + // Fallback to removing query params and fragments + return `${parsedUrl.origin}${parsedUrl.pathname}`; + } catch (e) { + debugLog('Error normalizing URL:', e); + return url; + } +} + +/** + * Save entry to local storage + * Eventually used for batch saving + */ +async function saveEntry(entry) { + try { + // Add custom tags if configured + const { scrollCaptureTags } = await chrome.storage.local.get(['scrollCaptureTags']); + const customTags = scrollCaptureTags ? + scrollCaptureTags.split(',').map(tag => tag.trim()) : []; + + // Create the full entry object + const fullEntry = { + id: crypto.randomUUID(), + url: entry.url, + timestamp: entry.timestamp || new Date().toISOString(), + tags: ['auto-captured', 'reddit', ...customTags, ...(entry.tags || [])], + title: entry.title || 'Reddit Post', + notes: `Auto-captured from Reddit: ${entry.url}` + }; + + // Save to storage + const { entries = [] } = await chrome.storage.local.get('entries'); + + // Normalize URLs for more accurate comparison + const normalizeUrl = (url) => { + try { + const normalized = new URL(url); + return normalized.origin + normalized.pathname.replace(/\/$/, ''); + } catch (e) { + return url; + } + }; + + // Check if this URL already exists in our entries + const normalizedEntryUrl = normalizeUrl(entry.url); + const existingEntry = entries.find(e => normalizeUrl(e.url) === normalizedEntryUrl); + + if (!existingEntry) { + entries.push(fullEntry); + await chrome.storage.local.set({ entries }); + debugLog('Entry saved to local storage:', fullEntry.title); + } else { + debugLog('URL already exists in entries, skipping:', entry.url); + } + } catch (e) { + debugLog('Error saving entry:', e); + } +} + +/** + * Inject content script for viewport detection + */ +export async function injectContentScript(tabId) { + try { + const { enableScrollCapture } = await chrome.storage.local.get(['enableScrollCapture']); + if (!enableScrollCapture) { + debugLog('Reddit capture is disabled in settings, not injecting content script'); + return; + } + + debugLog('Injecting Reddit content script into tab:', tabId); + + await chrome.scripting.executeScript({ + target: { tabId: tabId }, + files: ['reddit-content.js'] + }); + + debugLog('Content script injected successfully'); + } catch (err) { + debugLog('Error injecting content script:', err.message); + } +} + +/** + * Handle high priority capture request from content script + */ +export function captureHighPriority(entry, tabId) { + debugLog('Received high priority capture request from content script:', entry.url); + + // Create post data object + const postData = { + url: entry.url, + title: entry.title, + subreddit: entry.tags.find(tag => tag !== 'reddit' && tag !== 'viewport-captured'), + timestamp: entry.timestamp + }; + + // Queue with high priority + queueForCapture(postData, tabId, 'high'); +} + +/** + * Clear all queues and reset + */ +export function reset() { + captureQueue.high = []; + captureQueue.normal = []; + captureQueue.processing = false; + captureCount = 0; + debugLog('Reddit handler reset'); +} + +/** + * Public method to check if we should capture the current URL + */ +export function shouldCaptureUrl(url) { + if (!url.includes('reddit.com')) return false; + return isRedditPostUrl(url); +} + +/** + * Get stats about the Reddit handler + */ +export function getStats() { + return { + captureCount, + processedUrlsCount: processedUrls.size, + highPriorityQueueLength: captureQueue.high.length, + normalPriorityQueueLength: captureQueue.normal.length + }; +} diff --git a/site-handlers.js b/site-handlers.js new file mode 100644 index 0000000..4385ebd --- /dev/null +++ b/site-handlers.js @@ -0,0 +1,254 @@ +// site-handlers.js +// Registry for all site-specific handlers + +import * as RedditHandler from './reddit-handler.js'; + + +// Debug configuration +const DEBUG = true; + +// Debug logging +function debugLog(...args) { + if (DEBUG) { + console.log('[Site Handlers]', ...args); + } +} + +// Registry of all available site handlers +const handlers = { + // Reddit handler + reddit: { + name: 'Reddit', + module: RedditHandler, + domains: ['reddit.com'], + description: 'Automatically captures Reddit posts while browsing', + version: '1.0.0', + author: 'ArchiveBox' + } + + // Add more site handlers here following the same format + // For example: + /* + twitter: { + name: 'Twitter', + module: TwitterHandler, + domains: ['twitter.com', 'x.com'], + description: 'Captures tweets and threads', + version: '1.0.0', + author: 'ArchiveBox' + } + */ +}; + +/** + * Initialize all site handlers + */ +export async function initializeAll() { + debugLog('Initializing all site handlers'); + + // Check if site capture is enabled + const { enableScrollCapture } = await chrome.storage.local.get('enableScrollCapture'); + + if (!enableScrollCapture) { + debugLog('Site capture is disabled, skipping initialization'); + return; + } + + // Initialize each handler + for (const [id, handler] of Object.entries(handlers)) { + if (handler.module && typeof handler.module.initialize === 'function') { + try { + debugLog(`Initializing ${handler.name} handler`); + await handler.module.initialize(); + } catch (error) { + console.error(`Error initializing ${handler.name} handler:`, error); + } + } + } + + debugLog('All site handlers initialized'); +} + +/** + * Get a specific handler by ID + */ +export function getHandler(handlerId) { + return handlers[handlerId]?.module; +} + +/** + * Find a handler for a specific URL + */ +export function findHandlerForUrl(url) { + try { + const hostname = new URL(url).hostname.toLowerCase(); + + for (const [id, handler] of Object.entries(handlers)) { + if (handler.domains.some(domain => hostname.includes(domain))) { + return { id, handler: handler.module }; + } + } + } catch (error) { + console.error('Error finding handler for URL:', error); + } + + return null; +} + +/** + * Handle capture request from content script + */ +export async function handleCaptureRequest(entry, tabId) { + const handlerResult = findHandlerForUrl(entry.url); + + if (handlerResult) { + debugLog(`Using ${handlerResult.id} handler for ${entry.url}`); + + if (entry.priority === 'high' && typeof handlerResult.handler.captureHighPriority === 'function') { + return handlerResult.handler.captureHighPriority(entry, tabId); + } else if (typeof handlerResult.handler.captureNormal === 'function') { + return handlerResult.handler.captureNormal(entry, tabId); + } + } + + // No specific handler found, use generic method + debugLog(`No specific handler for ${entry.url}, using generic method`); + return saveGenericEntry(entry); +} + +/** + * Save a generic entry + */ +async function saveGenericEntry(entry) { + try { + if (!entry || !entry.url) { + return { success: false, reason: 'Invalid entry' }; + } + + // Get current entries + const { entries = [] } = await chrome.storage.local.get('entries'); + + // Check for duplicates + const normalizeUrl = (url) => { + try { + const normalized = new URL(url); + return normalized.origin + normalized.pathname.replace(/\/$/, ''); + } catch (e) { + return url; + } + }; + + const normalizedEntryUrl = normalizeUrl(entry.url); + const existingEntry = entries.find(e => normalizeUrl(e.url) === normalizedEntryUrl); + + if (existingEntry) { + return { success: false, reason: 'URL already exists' }; + } + + // Add custom tags if configured + const { scrollCaptureTags } = await chrome.storage.local.get(['scrollCaptureTags']); + const customTags = scrollCaptureTags ? + scrollCaptureTags.split(',').map(tag => tag.trim()) : []; + + // Extract site tags + const siteTags = getSiteTags(entry.url); + + // Create the full entry object + const fullEntry = { + id: entry.id || crypto.randomUUID(), + url: entry.url, + timestamp: entry.timestamp || new Date().toISOString(), + tags: ['auto-captured', ...siteTags, ...customTags, ...(entry.tags || [])], + title: entry.title || 'Captured content', + notes: entry.notes || `Auto-captured content: ${entry.url}`, + favicon: entry.favicon + }; + + // Add to entries + entries.push(fullEntry); + + // Save entries + await chrome.storage.local.set({ entries }); + + return { success: true }; + } catch (e) { + return { success: false, reason: e.message }; + } +} + +/** + * Extract site name for tagging + */ +function getSiteTags(url) { + try { + const hostname = new URL(url).hostname; + const domain = hostname + .replace('www.', '') + .replace(/\.(com|org|net|io|gov|edu)$/, ''); + return [domain]; + } catch (e) { + return []; + } +} + +/** + * Check a specific URL against all site handlers + */ +export function shouldCaptureUrl(url) { + try { + const handlerResult = findHandlerForUrl(url); + + if (handlerResult && handlerResult.handler.shouldCaptureUrl) { + return handlerResult.handler.shouldCaptureUrl(url); + } + } catch (error) { + console.error('Error checking if URL should be captured:', error); + } + + return false; +} + +/** + * Inject appropriate content script for a URL + */ +export async function injectContentScriptForUrl(url, tabId) { + try { + const handlerResult = findHandlerForUrl(url); + + if (handlerResult && handlerResult.handler.injectContentScript) { + await handlerResult.handler.injectContentScript(tabId); + return true; + } + } catch (error) { + console.error('Error injecting content script:', error); + } + + return false; +} + +/** + * Get stats from all handlers + */ +export function getAllStats() { + const stats = {}; + + for (const [id, handler] of Object.entries(handlers)) { + if (handler.module && typeof handler.module.getStats === 'function') { + stats[id] = handler.module.getStats(); + } + } + + return stats; +} + +/** + * Get all handlers + * Returns the complete registry of site handlers with their metadata + * @returns {Object} Object containing all registered handlers with their metadata + */ +export function getAllHandlers() { + return handlers; +} + +// Export all handlers for direct access +export const Reddit = RedditHandler; diff --git a/utils.js b/utils.js index e7dcc35..960ecdd 100755 --- a/utils.js +++ b/utils.js @@ -10,6 +10,20 @@ export async function getArchiveBoxServerUrl() { export function filterEntries(entries, filterText) { if (!filterText) return entries; + // Handle site: prefix + if (filterText.toLowerCase().startsWith('site:')) { + const siteId = filterText.slice(5).toLowerCase().trim(); + const handlers = getAllHandlers(); + const handler = handlers[siteId]; + + if (handler) { + return entries.filter(entry => + handler.domains.some(domain => entry.url.includes(domain)) + ); + } + } + + // Regular search const searchTerms = filterText.toLowerCase().split(' '); return entries.filter(entry => { const searchableText = [ @@ -207,3 +221,124 @@ export async function syncToArchiveBox(entry) { }; } } + +/** + * Check if a URL should be captured automatically based on regex patterns + * @param {string} url - The URL to check + * @returns {boolean} - Whether the URL should be captured + */ +export async function shouldAutoCapture(url) { + if (!url) return false; + + try { + const { match_urls, exclude_urls } = await chrome.storage.local.get(['match_urls', 'exclude_urls']); + + // If no match pattern is defined, don't capture + if (!match_urls) return false; + + // Create RegExp objects + const matchPattern = new RegExp(match_urls); + const excludePattern = exclude_urls ? new RegExp(exclude_urls) : null; + + // Check if URL matches the inclusion pattern and doesn't match the exclusion pattern + if (matchPattern.test(url)) { + return !excludePattern || !excludePattern.test(url); + } + + return false; + } catch (e) { + console.error('Error checking if URL should be captured:', e); + return false; + } +} + +/** + * Get all available site handlers + * @returns {Promise} - Array of site handler information + */ +export async function getAvailableSiteHandlers() { + try { + return await chrome.runtime.sendMessage({ type: 'getSiteHandlers' }); + } catch (e) { + console.error('Error getting site handlers:', e); + return []; + } +} + +/** + * Get capture statistics + * @returns {Promise} - Capture statistics by site + */ +export async function getCaptureStats() { + try { + const response = await chrome.runtime.sendMessage({ type: 'getStats' }); + return response?.stats || {}; + } catch (e) { + console.error('Error getting capture stats:', e); + return {}; + } +} + +/** + * Limit the size of a collection with a max size + * @param {Array|Set} collection - The collection to limit + * @param {number} maxSize - Maximum size allowed + * @returns {Array|Set} - The limited collection + */ +export function limitCollectionSize(collection, maxSize) { + if (!collection || typeof maxSize !== 'number' || maxSize <= 0) { + return collection; + } + + if (collection instanceof Set) { + if (collection.size <= maxSize) return collection; + + const newSet = new Set(); + const entries = [...collection].slice(-maxSize); // Keep newest items (at the end) + for (const entry of entries) { + newSet.add(entry); + } + return newSet; + } + + if (Array.isArray(collection)) { + if (collection.length <= maxSize) return collection; + return collection.slice(-maxSize); // Keep newest items (at the end) + } + + return collection; +} + +/** + * Get current capture configuration + * @returns {Promise} - Configuration object + */ +export async function getCaptureConfig() { + return await chrome.storage.local.get([ + 'enableScrollCapture', + 'scrollCaptureTags', + 'redditCaptureConfig' + ]); +} + +/** + * Save capture configuration + * @param {Object} config - Configuration to save + * @returns {Promise} + */ +export async function saveCaptureConfig(config) { + await chrome.storage.local.set(config); + + // Notify tabs about configuration changes + const tabs = await chrome.tabs.query({}); + for (const tab of tabs) { + try { + chrome.tabs.sendMessage(tab.id, { + type: 'captureConfigChanged', + config + }).catch(() => {/* Ignore errors for tabs that don't have content scripts */}); + } catch (e) { + // Ignore errors for tabs that don't have content scripts + } + } +}