diff --git a/background.js b/background.js
index 0c06e9e..1055a88 100755
--- a/background.js
+++ b/background.js
@@ -1,16 +1,256 @@
// background.js
import { addToArchiveBox } from "./utils.js";
+import * as RedditHandler from "./reddit-handler.js";
-chrome.runtime.onMessage.addListener(async (message) => {
- const options_url = chrome.runtime.getURL('options.html') + `?search=${message.id}`;
- console.log('i ArchiveBox Collector showing options.html', options_url);
- if (message.action === 'openOptionsPage') {
- await chrome.tabs.create({ url: options_url });
+// Debug configuration
+const DEBUG_MODE = true; // Set to true to see debugging info
+
+// Configuration
+const CONFIG = {
+ MAX_ENTRIES: 10000, // Maximum number of entries to store locally
+ STATUS_DISPLAY_TIME: 3000 // Time in ms to show status indicators
+};
+
+function debugLog(...args) {
+ if (DEBUG_MODE) {
+ console.log('[ArchiveBox Debug]', ...args);
+ }
+}
+
+// State management - sites handlers registry
+const siteHandlers = {
+ reddit: RedditHandler
+};
+
+// Content capture configuration
+let captureEnabled = false;
+
+// Initialize background script
+async function initialize() {
+ debugLog('Initializing background script');
+
+ // Load configuration
+ const { enableScrollCapture } = await chrome.storage.local.get('enableScrollCapture');
+ captureEnabled = !!enableScrollCapture;
+
+ // Initialize site handlers
+ if (captureEnabled) {
+ debugLog('Content capture is enabled, initializing handlers');
+ Object.values(siteHandlers).forEach(handler => {
+ if (typeof handler.initialize === 'function') {
+ handler.initialize();
+ }
+ });
+ }
+
+ // Check all existing tabs to find any supported site tabs already open
+ chrome.tabs.query({}, (tabs) => {
+ if (captureEnabled) {
+ debugLog(`Found ${tabs.length} existing tabs, checking for supported sites`);
+
+ // Check each tab for supported sites
+ tabs.forEach(tab => {
+ if (tab.url) {
+ Object.entries(siteHandlers).forEach(([site, handler]) => {
+ if (handler.shouldCaptureUrl && handler.shouldCaptureUrl(tab.url)) {
+ debugLog(`Found existing ${site} tab:`, tab.url);
+ if (handler.injectContentScript) {
+ handler.injectContentScript(tab.id);
+ }
+ }
+ });
+ }
+ });
}
});
+
+ debugLog('Background script initialized');
+}
+
+/**
+ * Listens for messages from content scripts and popup
+ */
+chrome.runtime.onMessage.addListener((message, sender, sendResponse) => {
+ debugLog('Message received:', message.type || message.action);
+
+ // Handle opening options page
+ if (message.action === 'openOptionsPage') {
+ const options_url = chrome.runtime.getURL('options.html') + `?search=${message.id}`;
+ debugLog('Opening options page:', options_url);
+ chrome.tabs.create({ url: options_url });
+ }
+
+ // Handle archivebox_add
+ if (message.type === 'archivebox_add') {
+ debugLog('ArchiveBox add request');
+ addToArchiveBox(message.body, sendResponse, sendResponse);
+ return true; // Keep the message channel open for the async response
+ }
+
+ // Handle content capture
+ if (message.type === 'capture') {
+ debugLog('Capture request received:', message.entry.url);
+
+ if (!captureEnabled) {
+ debugLog('Content capture is disabled, ignoring capture request');
+ sendResponse({ success: false, reason: 'Capture disabled' });
+ return true;
+ }
+
+ // Determine site handler based on URL or tags
+ const url = message.entry.url;
+ let handled = false;
+
+ // Check if it's from Reddit
+ if (message.entry.tags.includes('reddit') || url.includes('reddit.com')) {
+ if (message.entry.priority === 'high') {
+ // Use high priority capture for viewport posts
+ RedditHandler.captureHighPriority(message.entry, sender.tab?.id);
+ } else {
+ // Let reddit handler decide what to do
+ RedditHandler.queueForCapture(message.entry, sender.tab?.id, 'normal');
+ }
+ handled = true;
+ }
+
+ // Generic handling for other sites or if no specific handler was found
+ if (!handled) {
+ saveEntry(message.entry);
+ }
+
+ sendResponse({ success: true });
+ }
+ // Enable status requests
+ if (message.type === 'getEnableStatus') {
+ chrome.storage.local.get(['enableScrollCapture'], (result) => {
+ sendResponse({ enableScrollCapture: !!result.enableScrollCapture });
+ });
+ return true; // Keep the message channel open for async response
+ }
+
+ // Show status notification in tabs
+ if (message.type === 'showStatus') {
+ const tabId = message.tabId || (sender.tab && sender.tab.id);
+ if (tabId) {
+ try {
+ showStatusInTab(tabId, message.message, message.count, message.immediate);
+ } catch (err) {
+ debugLog('Error showing status:', err);
+ }
+ }
+ sendResponse({ success: true });
+ }
+
+ // Get site handler stats
+ if (message.type === 'getStats') {
+ const stats = {};
+ Object.entries(siteHandlers).forEach(([site, handler]) => {
+ if (handler.getStats) {
+ stats[site] = handler.getStats();
+ }
+ });
+ sendResponse({ stats });
+ return true;
+ }
+ if (message.type === 'getSiteHandlerForUrl') {
+ try {
+ const url = message.url;
+ const handlerResult = findHandlerForUrl(url);
+
+ if (handlerResult) {
+ const { id, handler } = handlerResult;
+ const handlers = getAllHandlers();
+ const handlerInfo = handlers[id];
+
+ sendResponse({
+ found: true,
+ handler: {
+ id,
+ name: handlerInfo.name,
+ description: handlerInfo.description,
+ version: handlerInfo.version
+ }
+ });
+ } else {
+ sendResponse({ found: false });
+ }
+ } catch (error) {
+ console.error('Error finding handler for URL:', error);
+ sendResponse({ found: false, error: error.message });
+ }
+ return true;
+ }
+
+ // Get all site handlers
+ if (message.type === 'getSiteHandlers') {
+ try {
+ const handlers = getAllHandlers();
+ sendResponse({ handlers });
+ } catch (error) {
+ console.error('Error getting site handlers:', error);
+ sendResponse({ handlers: {} });
+ }
+ return true;
+ }
+
+ // URL visited notification
+ if (message.type === 'urlVisited') {
+ try {
+ const url = message.url;
+ const handlerResult = findHandlerForUrl(url);
+
+ if (handlerResult && typeof handlerResult.handler.onUrlVisited === 'function') {
+ handlerResult.handler.onUrlVisited(url);
+ }
+
+ sendResponse({ success: true });
+ } catch (error) {
+ console.error('Error handling URL visit:', error);
+ sendResponse({ success: false });
+ }
+ return true;
+ }
+
+ // Configuration change notification
+ if (message.type === 'captureConfigChanged') {
+ try {
+ const { config } = message;
+
+ // Update enabled state
+ captureEnabled = !!config.enableScrollCapture;
+
+ // Notify handlers
+ Object.values(siteHandlers).forEach(handler => {
+ if (typeof handler.onConfigChanged === 'function') {
+ handler.onConfigChanged(config);
+ }
+ });
+
+ sendResponse({ success: true });
+ } catch (error) {
+ console.error('Error handling config change:', error);
+ sendResponse({ success: false });
+ }
+ return true;
+ }
+
+ return true; // Indicate async response
+});
+
+/**
+ * Handle click on extension icon
+ */
chrome.action.onClicked.addListener(async (tab) => {
+ debugLog('Extension icon clicked on tab:', tab.url);
+
+ // Don't try to execute script on chrome:// URLs
+ if (tab.url.startsWith('chrome://')) {
+ debugLog('Cannot execute on chrome:// URL, skipping');
+ return;
+ }
+
const entry = {
id: crypto.randomUUID(),
url: tab.url,
@@ -20,30 +260,36 @@ chrome.action.onClicked.addListener(async (tab) => {
favicon: tab.favIconUrl
};
+ debugLog('Created entry from tab click:', entry);
+
// Save the entry first
const { entries = [] } = await chrome.storage.local.get('entries');
entries.push(entry);
await chrome.storage.local.set({ entries });
+ debugLog('Entry saved to local storage');
- // Inject scripts - CSS now handled in popup.js
+ // Inject popup script
+ debugLog('Injecting popup script into tab');
await chrome.scripting.executeScript({
target: { tabId: tab.id },
files: ['popup.js']
+ }).catch(err => {
+ console.error('Error injecting script:', err);
});
});
-chrome.runtime.onMessage.addListener((message, sender, sendResponse) => {
- if (message.type === 'archivebox_add') {
- addToArchiveBox(message.body, sendResponse, sendResponse);
+/**
+ * Handle context menu click
+ */
+chrome.contextMenus.onClicked.addListener(async function(item, tab) {
+ debugLog('Context menu save clicked for tab:', tab.url);
+
+ // Don't try to execute script on chrome:// URLs
+ if (tab.url.startsWith('chrome://')) {
+ debugLog('Cannot execute on chrome:// URL, skipping');
+ return;
}
- return true;
-});
-
-
-chrome.contextMenus.onClicked.addListener(onClickContextMenuSave);
-
-// A generic onclick callback function.
-async function onClickContextMenuSave(item, tab) {
+
const entry = {
id: crypto.randomUUID(),
url: tab.url,
@@ -53,20 +299,328 @@ async function onClickContextMenuSave(item, tab) {
favicon: tab.favIconUrl
};
+ debugLog('Created entry from context menu:', entry);
+
// Save the entry first
const { entries = [] } = await chrome.storage.local.get('entries');
entries.push(entry);
await chrome.storage.local.set({ entries });
+ debugLog('Entry saved to local storage');
- // Inject scripts - CSS now handled in popup.js
+ // Inject popup script
+ debugLog('Injecting popup script into tab');
await chrome.scripting.executeScript({
target: { tabId: tab.id },
files: ['popup.js']
+ }).catch(err => {
+ console.error('Error injecting script:', err);
});
-}
+});
+
+/**
+ * Handle extension installation and updates
+ */
chrome.runtime.onInstalled.addListener(function () {
+ debugLog('Extension installed or updated');
+
+ // Create context menu
chrome.contextMenus.create({
id: 'save_to_archivebox_ctxmenu',
title: 'Save to ArchiveBox',
});
+
+ // Set up configuration defaults
+ initializeConfiguration();
+
+ // Initialize the extension
+ initialize();
});
+
+/**
+ * Set up configuration defaults if needed
+ */
+async function initializeConfiguration() {
+ const config = await chrome.storage.local.get([
+ 'archivebox_server_url',
+ 'archivebox_api_key',
+ 'enableScrollCapture',
+ 'scrollCaptureTags'
+ ]);
+
+ const updates = {};
+
+ // Set default values if undefined
+ if (config.archivebox_server_url === undefined) {
+ updates.archivebox_server_url = '';
+ }
+
+ if (config.archivebox_api_key === undefined) {
+ updates.archivebox_api_key = '';
+ }
+
+ if (config.enableScrollCapture === undefined) {
+ updates.enableScrollCapture = false;
+ }
+
+ if (config.scrollCaptureTags === undefined) {
+ updates.scrollCaptureTags = '';
+ }
+
+ // Save defaults if needed
+ if (Object.keys(updates).length > 0) {
+ await chrome.storage.local.set(updates);
+ debugLog('Set default config values:', updates);
+ }
+}
+
+/**
+ * Handle new tab creation
+ */
+chrome.tabs.onCreated.addListener((tab) => {
+ // We'll check if it's a supported site tab once the navigation completes
+ debugLog('New tab created:', tab.id);
+});
+
+/**
+ * Handle tab navigation to detect supported sites
+ */
+chrome.tabs.onUpdated.addListener(async (tabId, changeInfo, tab) => {
+ // Only react when the tab has completed loading and we have a URL
+ if (changeInfo.status === 'complete' && tab.url) {
+ // Check if content capture is enabled
+ const { enableScrollCapture } = await chrome.storage.local.get('enableScrollCapture');
+ captureEnabled = !!enableScrollCapture;
+
+ if (captureEnabled) {
+ debugLog('Tab updated, checking for supported sites:', tab.url);
+
+ // Check URL against each site handler
+ Object.entries(siteHandlers).forEach(([site, handler]) => {
+ if (handler.shouldCaptureUrl && handler.shouldCaptureUrl(tab.url)) {
+ debugLog(`Detected ${site} site in tab:`, tab.url);
+ if (handler.injectContentScript) {
+ handler.injectContentScript(tabId);
+ }
+ }
+ });
+ }
+ }
+});
+
+/**
+ * Generic entry saving logic for any URL
+ */
+async function saveEntry(entry) {
+ try {
+ if (!entry || !entry.url) {
+ debugLog('Invalid entry, not saving', entry);
+ return { success: false, reason: 'Invalid entry' };
+ }
+
+ debugLog('Saving entry:', entry.url);
+
+ // Get current entries
+ const { entries = [] } = await chrome.storage.local.get('entries');
+
+ // Check for duplicates
+ const normalizeUrl = (url) => {
+ try {
+ const normalized = new URL(url);
+ return normalized.origin + normalized.pathname.replace(/\/$/, '');
+ } catch (e) {
+ debugLog('URL normalization error:', e);
+ return url;
+ }
+ };
+
+ const normalizedEntryUrl = normalizeUrl(entry.url);
+ const existingEntry = entries.find(e => normalizeUrl(e.url) === normalizedEntryUrl);
+
+ if (existingEntry) {
+ debugLog('URL already exists in entries, skipping:', entry.url);
+ return { success: false, reason: 'URL already exists' };
+ }
+
+ // Add custom tags if configured
+ const { scrollCaptureTags } = await chrome.storage.local.get(['scrollCaptureTags']);
+ const customTags = scrollCaptureTags ?
+ scrollCaptureTags.split(',').map(tag => tag.trim()) : [];
+
+ // Extract site tags
+ const siteTags = getSiteTags(entry.url);
+
+ // Create the full entry object
+ const fullEntry = {
+ id: entry.id || crypto.randomUUID(),
+ url: entry.url,
+ timestamp: entry.timestamp || new Date().toISOString(),
+ tags: ['auto-captured', ...siteTags, ...customTags, ...(entry.tags || [])],
+ title: entry.title || 'Captured content',
+ notes: entry.notes || `Auto-captured content: ${entry.url}`,
+ favicon: entry.favicon
+ };
+
+ // Add to entries
+ entries.push(fullEntry);
+
+ // Limit entries if exceeding maximum
+ if (entries.length > CONFIG.MAX_ENTRIES) {
+ // Sort by timestamp (oldest first) and remove excess
+ entries.sort((a, b) => new Date(a.timestamp) - new Date(b.timestamp));
+ const removed = entries.splice(0, entries.length - CONFIG.MAX_ENTRIES);
+ debugLog(`Removed ${removed.length} oldest entries to stay under limit`);
+ }
+
+ // Save entries
+ await chrome.storage.local.set({ entries });
+ debugLog('Entry saved to local storage');
+
+ return { success: true };
+ } catch (e) {
+ debugLog('Error saving entry:', e);
+ return { success: false, reason: e.message };
+ }
+}
+
+/**
+ * Extract site name for tagging
+ */
+function getSiteTags(url) {
+ try {
+ const hostname = new URL(url).hostname;
+ const domain = hostname
+ .replace('www.', '')
+ .replace(/\.(com|org|net|io|gov|edu)$/, '');
+ return [domain];
+ } catch (e) {
+ debugLog('Error extracting site tags:', e);
+ return [];
+ }
+}
+
+/**
+ * Show status message in tab
+ */
+async function showStatusInTab(tabId, message, count, immediate = false) {
+ try {
+ // Check if tab still exists before proceeding
+ try {
+ const tab = await chrome.tabs.get(tabId);
+ if (!tab) {
+ debugLog(`Tab ${tabId} no longer exists, skipping status update`);
+ return;
+ }
+ } catch (e) {
+ debugLog(`Tab ${tabId} error or no longer exists:`, e.message);
+ return;
+ }
+
+ // Setup status indicator if not already present
+ await chrome.scripting.executeScript({
+ target: { tabId: tabId },
+ function: setupStatusIndicator,
+ }).catch(err => {
+ debugLog(`Error setting up status indicator in tab ${tabId}:`, err.message);
+ return;
+ });
+
+ // Show the status message
+ await chrome.scripting.executeScript({
+ target: { tabId: tabId },
+ args: [message, count || 0, immediate],
+ function: (message, count, immediate) => {
+ // Add to status queue
+ if (!window.archiveBoxStatusQueue) window.archiveBoxStatusQueue = [];
+ window.archiveBoxStatusQueue.unshift(message);
+
+ // Keep only 5 items
+ if (window.archiveBoxStatusQueue.length > 5) {
+ window.archiveBoxStatusQueue = window.archiveBoxStatusQueue.slice(0, 5);
+ }
+
+ // Show status
+ const indicator = document.getElementById('archiveBoxStatusIndicator');
+ const messageContainer = document.getElementById('archiveBoxStatusMessages');
+ const countIndicator = document.getElementById('archiveBoxStatusCount');
+
+ if (indicator && messageContainer && countIndicator) {
+ // Update message list
+ messageContainer.innerHTML = window.archiveBoxStatusQueue.map(msg =>
+ `
• ${msg}
`
+ ).join('');
+
+ // Update count
+ countIndicator.textContent = `Captured ${count} posts`;
+
+ // Show indicator
+ indicator.style.opacity = '1';
+
+ // Auto hide
+ clearTimeout(window.archiveBoxStatusTimeout);
+ window.archiveBoxStatusTimeout = setTimeout(() => {
+ indicator.style.opacity = '0';
+ }, 3000);
+ }
+ }
+ }).catch(err => {
+ debugLog(`Error showing status in tab ${tabId}:`, err.message);
+ });
+ } catch (err) {
+ debugLog('Error showing status:', err);
+ }
+}
+
+/**
+ * Setup status indicator in tab
+ */
+function setupStatusIndicator() {
+ if (!document.getElementById('archiveBoxStatusIndicator')) {
+ const indicator = document.createElement('div');
+ indicator.id = 'archiveBoxStatusIndicator';
+ indicator.style.cssText = `
+ position: fixed;
+ bottom: 10px;
+ right: 10px;
+ background: rgba(0, 0, 0, 0.7);
+ color: white;
+ padding: 10px;
+ border-radius: 5px;
+ font-size: 12px;
+ z-index: 9999;
+ transition: opacity 0.5s;
+ opacity: 0;
+ max-width: 300px;
+ max-height: 200px;
+ overflow-y: auto;
+ line-height: 1.3;
+ `;
+ document.body.appendChild(indicator);
+
+ // Create a container for the message list
+ const messageContainer = document.createElement('div');
+ messageContainer.id = 'archiveBoxStatusMessages';
+ indicator.appendChild(messageContainer);
+
+ // Create a count indicator
+ const countIndicator = document.createElement('div');
+ countIndicator.id = 'archiveBoxStatusCount';
+ countIndicator.style.cssText = `
+ margin-top: 5px;
+ font-weight: bold;
+ text-align: center;
+ border-top: 1px solid rgba(255, 255, 255, 0.3);
+ padding-top: 5px;
+ `;
+ indicator.appendChild(countIndicator);
+
+ // Initialize status queue
+ window.archiveBoxStatusQueue = [];
+ }
+}
+
+// Initialize on startup
+chrome.runtime.onStartup.addListener(() => {
+ debugLog('Extension started');
+ initialize();
+});
+
diff --git a/config-tab.js b/config-tab.js
index 42a897c..7635215 100755
--- a/config-tab.js
+++ b/config-tab.js
@@ -1,5 +1,6 @@
// Config tab initialization and handlers
import { updateStatusIndicator, syncToArchiveBox, getArchiveBoxServerUrl } from './utils.js';
+import { getAllHandlers, getAllStats } from './site-handlers.js';
export async function initializeConfigTab() {
const configForm = document.getElementById('configForm');
@@ -15,10 +16,9 @@ export async function initializeConfigTab() {
'match_urls',
'exclude_urls',
]);
- console.log('Got config values from storage:', archivebox_server_url, archivebox_api_key, match_urls, exclude_urls);
// migrate old config_archiveboxBaseUrl to archivebox_server_url
- const {config_archiveBoxBaseUrl} = await chrome.storage.sync.get('config_archiveboxBaseUrl', );
+ const {config_archiveBoxBaseUrl} = await chrome.storage.sync.get('config_archiveBoxBaseUrl', );
if (config_archiveBoxBaseUrl) {
await chrome.storage.local.set({ archivebox_server_url: config_archiveBoxBaseUrl });
}
@@ -209,7 +209,7 @@ export async function initializeConfigTab() {
};
const result = await syncToArchiveBox(testEntry);
- document.getElementById('inprogress-test').remove();
+ document.getElementById('inprogress-test')?.remove();
if (result.ok) {
testStatus.innerHTML += `
@@ -241,6 +241,278 @@ export async function initializeConfigTab() {
testButton.click();
}
});
+
+ // Initialize site-specific capture settings
+ await initializeSiteCapture();
+}
+
+/**
+ * Initialize site-specific capture settings
+ */
+async function initializeSiteCapture() {
+ // Load scroll capture settings
+ const enableScrollCapture = document.getElementById('enableScrollCapture');
+ const scrollCaptureTags = document.getElementById('scrollCaptureTags');
+
+ const {
+ enableScrollCapture: savedEnableScrollCapture,
+ scrollCaptureTags: savedScrollCaptureTags,
+ redditCaptureConfig
+ } = await chrome.storage.local.get([
+ 'enableScrollCapture',
+ 'scrollCaptureTags',
+ 'redditCaptureConfig'
+ ]);
+
+ enableScrollCapture.checked = !!savedEnableScrollCapture;
+ scrollCaptureTags.value = savedScrollCaptureTags || '';
+
+ // Add event handlers for scroll capture settings
+ enableScrollCapture.addEventListener('change', async () => {
+ await chrome.storage.local.set({ enableScrollCapture: enableScrollCapture.checked });
+
+ // Notify all tabs of the change
+ const tabs = await chrome.tabs.query({});
+ for (const tab of tabs) {
+ try {
+ chrome.tabs.sendMessage(tab.id, {
+ type: 'captureStatusChanged',
+ enabled: enableScrollCapture.checked
+ }).catch(() => {/* Ignore errors for tabs that don't have the content script */});
+ } catch (e) {
+ // Ignore errors for tabs that don't have the content script
+ }
+ }
+ });
+
+ scrollCaptureTags.addEventListener('change', async () => {
+ await chrome.storage.local.set({ scrollCaptureTags: scrollCaptureTags.value });
+ });
+
+ // Initialize Reddit-specific settings
+ await initializeRedditSettings(redditCaptureConfig);
+
+ // Add site handlers information
+ populateSiteHandlersInfo();
+
+ // Add capture stats display
+ await updateCaptureStats();
+
+ // Set up stats refresh button
+ document.getElementById('refreshCaptureStats')?.addEventListener('click', updateCaptureStats);
+}
+
+/**
+ * Initialize Reddit-specific settings
+ */
+async function initializeRedditSettings(savedConfig) {
+ // Default configuration
+ const defaultConfig = {
+ captureSubreddits: true,
+ capturePostDetails: true,
+ captureComments: false,
+ commentsDepth: 2,
+ excludedSubreddits: [],
+ includedSubreddits: [],
+ maxProcessedPosts: 1000
+ };
+
+ // Merge saved config with defaults
+ const config = { ...defaultConfig, ...(savedConfig || {}) };
+
+ // Create Reddit-specific settings UI if it doesn't exist
+ const redditSettingsContainer = document.getElementById('redditSettingsContainer');
+ if (!redditSettingsContainer) {
+ return; // Element doesn't exist, can't add settings
+ }
+
+ // Build the Reddit settings UI
+ redditSettingsContainer.innerHTML = `
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Maximum number of post IDs to keep in memory (100-10000)
+
+
+
+
+
+
+
+
+
Only posts from these subreddits will be captured
+
+
+
+
+
Posts from these subreddits will never be captured
+
+
+
+
+
+
+ `;
+
+ // Add event listener for saving settings
+ document.getElementById('saveRedditSettings').addEventListener('click', async () => {
+ // Collect the current settings
+ const newConfig = {
+ captureSubreddits: document.getElementById('redditCaptureSubreddits').checked,
+ capturePostDetails: document.getElementById('redditCapturePostDetails').checked,
+ captureComments: document.getElementById('redditCaptureComments').checked,
+ commentsDepth: parseInt(document.getElementById('redditCommentsDepth').value, 10),
+ maxProcessedPosts: parseInt(document.getElementById('redditMaxProcessedPosts').value, 10),
+ includedSubreddits: document.getElementById('redditIncludedSubreddits').value
+ .split(',')
+ .map(s => s.trim().toLowerCase())
+ .filter(s => s),
+ excludedSubreddits: document.getElementById('redditExcludedSubreddits').value
+ .split(',')
+ .map(s => s.trim().toLowerCase())
+ .filter(s => s)
+ };
+
+ // Validate settings
+ if (newConfig.maxProcessedPosts < 100) newConfig.maxProcessedPosts = 100;
+ if (newConfig.maxProcessedPosts > 10000) newConfig.maxProcessedPosts = 10000;
+
+ // Save the settings
+ await chrome.storage.local.set({ redditCaptureConfig: newConfig });
+
+ // Show success message
+ alert('Reddit settings saved successfully');
+ });
+}
+
+/**
+ * Populate site handlers information
+ */
+function populateSiteHandlersInfo() {
+ const handlersContainer = document.getElementById('siteHandlersContainer');
+ if (!handlersContainer) return;
+
+ const handlers = getAllHandlers();
+
+ // Create the handlers info UI
+ handlersContainer.innerHTML = `
+
+
+
+
+
+
+ Handler |
+ Domains |
+ Version |
+ Description |
+
+
+
+ ${Object.entries(handlers).map(([id, handler]) => `
+
+ ${handler.name} |
+ ${handler.domains.join(', ')} |
+ ${handler.version} |
+ ${handler.description} |
+
+ `).join('')}
+
+
+
+
+ `;
+}
+
+/**
+ * Update capture stats
+ */
+async function updateCaptureStats() {
+ const statsContainer = document.getElementById('captureStatsContainer');
+ if (!statsContainer) return;
+
+ // Get stats from all handlers
+ const stats = await new Promise(resolve => {
+ chrome.runtime.sendMessage({ type: 'getStats' }, response => {
+ resolve(response?.stats || {});
+ });
+ });
+
+ // Create the stats UI
+ statsContainer.innerHTML = `
+
+
+
+
+ ${Object.entries(stats).map(([site, siteStats]) => `
+
+
+
+
+
+ ${Object.entries(siteStats).map(([key, value]) => `
+ -
+ ${key.replace(/([A-Z])/g, ' $1').replace(/^./, str => str.toUpperCase())}
+ ${value}
+
+ `).join('')}
+
+
+
+
+ `).join('')}
+
+
+
+ `;
+
+ // Re-attach the refresh button event listener
+ document.getElementById('refreshCaptureStats')?.addEventListener('click', updateCaptureStats);
}
// Using shared syncToArchiveBox function from utils.js
diff --git a/entries-tab.js b/entries-tab.js
index 9481646..af02568 100755
--- a/entries-tab.js
+++ b/entries-tab.js
@@ -1,4 +1,38 @@
import { filterEntries, addToArchiveBox, downloadCsv, downloadJson, syncToArchiveBox, updateStatusIndicator, getArchiveBoxServerUrl } from './utils.js';
+import { getAllHandlers, shouldCaptureUrl } from './site-handlers.js';
+
+/**
+ * Get site handler information for an entry
+ * @param {Object} entry - The entry to get handler info for
+ * @return {Object|null} Handler info if found
+ */
+async function getSiteHandlerForEntry(entry) {
+ if (!entry || !entry.url) return null;
+
+ try {
+ // Send message to background script
+ return new Promise(resolve => {
+ chrome.runtime.sendMessage(
+ { type: 'getSiteHandlerForUrl', url: entry.url },
+ response => resolve(response?.handler || null)
+ );
+ });
+ } catch (error) {
+ console.error('Error getting site handler for entry:', error);
+ return null;
+ }
+}
+
+function getSiteHandlerIcon(handlerId) {
+ const icons = {
+ reddit: '💬',
+ twitter: '🐦',
+ youtube: '▶️',
+ default: '🌐'
+ };
+
+ return icons[handlerId] || icons.default;
+}
export async function renderEntries(filterText = '', tagFilter = '') {
const { entries = [] } = await chrome.storage.local.get('entries');
@@ -16,23 +50,85 @@ export async function renderEntries(filterText = '', tagFilter = '') {
// Display filtered entries
const entriesList = document.getElementById('entriesList');
- entriesList.innerHTML = filteredEntries.map(entry => `
-
-
-
- ${new Date(entry.timestamp).toISOString().replace('T', ' ').split('.')[0]}
-
-
-
- ${entry.tags.length ? `
-
- ${entry.tags.map(tag =>
- `${tag}`
- ).join('')}
-
- ` : ''}
+ // Add a custom style for site handler badges if not already present
+ if (!document.getElementById('siteHandlerStyles')) {
+ const style = document.createElement('style');
+ style.id = 'siteHandlerStyles';
+ style.textContent = `
+ .site-handler-badge {
+ display: inline-flex;
+ align-items: center;
+ padding: 2px 6px;
+ font-size: 0.7rem;
+ background-color: #e3f2fd;
+ color: #0d6efd;
+ border-radius: 4px;
+ margin-right: 8px;
+ }
+
+ .site-handler-icon {
+ margin-right: 2px;
+ }
+ `;
+ document.head.appendChild(style);
+ }
+
+ // Get site handler info for each entry
+ const entryHandlers = await Promise.all(
+ filteredEntries.map(async entry => {
+ return {
+ entry,
+ handler: await getSiteHandlerForEntry(entry)
+ };
+ })
+ );
+
+ entriesList.innerHTML = entryHandlers.map(({ entry, handler }) => `
+
+
+
+
+
+ ${handler ?
+ `
+ ${getSiteHandlerIcon(handler.id)}
+ ${handler.name}
+ ` : ''
+ }
+ ${entry.title || 'Untitled'}
+
+ ${(()=>{
+ return archivebox_server_url ?
+ `
`
+ : '' })()
+ }
+
+
+

+
${entry.url}
+
+ ${new Date(entry.timestamp).toLocaleString()}
+
+
+
+ ${entry.tags.map(tag =>
+ `${tag}`
+ ).join('')}
@@ -330,9 +426,58 @@ export function initializeEntriesTab() {
window.history.pushState({}, '', newUrl);
}
+ /**
+ * Render the tags list sidebar with frequency counts and site filters
+ * @param {Array} filteredEntries - The currently filtered entries
+ */
async function renderTagsList(filteredEntries) {
const tagsList = document.getElementById('tagsList');
+ // Add site handler filters
+ const handlers = getAllHandlers();
+
+ // Check if we have entries from supported sites
+ const siteCount = {};
+
+ filteredEntries.forEach(entry => {
+ Object.entries(handlers).forEach(([id, handler]) => {
+ if (handler.domains.some(domain => entry.url.includes(domain))) {
+ siteCount[id] = (siteCount[id] || 0) + 1;
+ }
+ });
+ });
+
+ // Start with site filters if we have entries from supported sites
+ let tagsListHTML = '';
+
+ if (Object.keys(siteCount).length > 0) {
+ tagsListHTML += '
Sites
';
+
+ // Get current filter to highlight active site if any
+ const currentFilter = document.getElementById('filterInput').value.toLowerCase();
+
+ // Add site filters sorted by count
+ tagsListHTML += Object.entries(siteCount)
+ .sort(([, countA], [, countB]) => countB - countA)
+ .map(([siteId, count]) => {
+ const handler = handlers[siteId];
+ const isActive = currentFilter === `site:${siteId}`;
+
+ return `
+
+
+ ${getSiteHandlerIcon(siteId)} ${handler.name}
+
+ ${count}
+
+ `;
+ }).join('');
+
+ tagsListHTML += '
Tags
';
+ }
+
// Count occurrences of each tag in filtered entries only
const tagCounts = filteredEntries.reduce((acc, entry) => {
entry.tags.forEach(tag => {
@@ -340,19 +485,19 @@ export function initializeEntriesTab() {
});
return acc;
}, {});
-
+
// Sort tags by frequency (descending) then alphabetically
const sortedTags = Object.entries(tagCounts)
.sort(([tagA, countA], [tagB, countB]) => {
if (countB !== countA) return countB - countA;
return tagA.localeCompare(tagB);
});
-
+
// Get current filter to highlight active tag if any
const currentFilter = document.getElementById('filterInput').value.toLowerCase();
-
- // Render tags list with counts
- tagsList.innerHTML = sortedTags.map(([tag, count]) => `
+
+ // Add tags with counts
+ tagsListHTML += sortedTags.map(([tag, count]) => `
@@ -360,7 +505,10 @@ export function initializeEntriesTab() {
${count}
`).join('');
-
+
+ // Set the HTML
+ tagsList.innerHTML = tagsListHTML;
+
// Add click handlers for tag filtering
tagsList.querySelectorAll('.tag-filter').forEach(tagElement => {
tagElement.addEventListener('click', (e) => {
@@ -378,6 +526,24 @@ export function initializeEntriesTab() {
renderEntries();
});
});
+
+ // Add click handlers for site filtering
+ tagsList.querySelectorAll('.site-filter').forEach(siteElement => {
+ siteElement.addEventListener('click', (e) => {
+ e.preventDefault();
+ const site = siteElement.dataset.site;
+ const filterInput = document.getElementById('filterInput');
+
+ // Toggle site filter
+ if (filterInput.value.toLowerCase() === `site:${site}`) {
+ filterInput.value = ''; // Clear filter if clicking active site
+ } else {
+ filterInput.value = `site:${site}`;
+ }
+
+ renderEntries();
+ });
+ });
}
// Modify existing renderEntries function
diff --git a/manifest.json b/manifest.json
index 4f32a09..3082cb8 100755
--- a/manifest.json
+++ b/manifest.json
@@ -8,15 +8,24 @@
"scripting",
"activeTab",
"contextMenus",
- "unlimitedStorage"
+ "unlimitedStorage",
+ "webRequest",
+ "tabs",
+ "webNavigation"
],
"optional_permissions": [
"cookies",
"history",
"bookmarks"
],
- "optional_host_permissions": [
- "*://*\/*"
+ "host_permissions": [
+ "
"
+ ],
+ "content_scripts": [
+ {
+ "matches": ["*://*.reddit.com/*"],
+ "js": ["reddit-content.js"]
+ }
],
"icons": {
"16": "16.png",
@@ -39,8 +48,8 @@
"type": "module"
},
"web_accessible_resources": [{
- "resources": ["popup.css", "popup.js"],
- "matches": ["*://*\/*"]
+ "resources": ["popup.css", "popup.js", "site-handlers.js", "reddit-handler.js"],
+ "matches": ["*://*/*"]
}],
"commands": {
"save-to-archivebox-action": {
diff --git a/options.html b/options.html
index 73f27e6..03bca06 100755
--- a/options.html
+++ b/options.html
@@ -299,6 +299,35 @@ Advanced Users Only: Auto-archive URLs
+
+
+
Content Capture Settings
+
+
+
+
+
+ When enabled, the extension will automatically detect and save content from supported sites as you browse them.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/options.js b/options.js
index 7c9c4e2..f01e9f3 100755
--- a/options.js
+++ b/options.js
@@ -3,15 +3,20 @@ import { initializeImport } from './import-tab.js';
import { initializePersonasTab } from './personas-tab.js';
import { initializeCookiesTab } from './cookies-tab.js';
import { initializeConfigTab } from './config-tab.js';
+import { initializeAll as initializeAllSiteHandlers } from './site-handlers.js';
// Initialize all tabs when options page loads
-document.addEventListener('DOMContentLoaded', () => {
+document.addEventListener('DOMContentLoaded', async () => {
+ // Initialize all tabs
initializeEntriesTab();
initializeImport();
initializePersonasTab();
initializeCookiesTab();
initializeConfigTab();
+ // Initialize site handlers
+ await initializeAllSiteHandlers();
+
function changeTab() {
if (window.location.hash && window.location.hash !== document.querySelector('a.nav-link.active').id) {
console.log('Changing tab based on URL hash:', window.location.hash, `a.nav-link${window.location.hash}`, document.querySelector(`a.nav-link${window.location.hash}`));
diff --git a/popup.js b/popup.js
index cb5b74f..245e1ee 100755
--- a/popup.js
+++ b/popup.js
@@ -3,6 +3,8 @@
const IS_IN_POPUP = window.location.href.startsWith('chrome-extension://') && window.location.href.endsWith('/popup.html');
const IS_ON_WEBSITE = !window.location.href.startsWith('chrome-extension://');
+window.handler_stats = null; // Global stats reference
+
window.popup_element = null; // Global reference to popup element
window.hide_timer = null;
@@ -65,6 +67,33 @@ async function sendToArchiveBox(url, tags) {
return { ok: ok, status: status};
}
+async function getSiteHandlerInfo(url) {
+ try {
+ if (!url) return null;
+
+ const response = await chrome.runtime.sendMessage({
+ type: 'getSiteHandlerForUrl',
+ url
+ });
+
+ return response?.handler || null;
+ } catch (error) {
+ console.log('Failed to get site handler info:', error);
+ return null;
+ }
+}
+
+async function getHandlerStats() {
+ try {
+ const response = await chrome.runtime.sendMessage({ type: 'getStats' });
+ window.handler_stats = response?.stats || {};
+ return window.handler_stats;
+ } catch (error) {
+ console.log('Failed to get handler stats:', error);
+ return {};
+ }
+}
+
window.getCurrentEntry = async function() {
const { entries = [] } = await chrome.storage.local.get('entries');
let current_entry = entries.find(entry => entry.url === window.location.href);
@@ -411,6 +440,7 @@ window.createPopup = async function() {
🏛️
+
Saved locally...
@@ -603,6 +633,56 @@ window.createPopup = async function() {
selectedIndex = -1;
}
});
+ // Check if this URL has a specific handler and show info
+ const siteHandlerInfo = await getSiteHandlerInfo(current_entry.url);
+ const statsContainer = popup.querySelector('.site-handler-info');
+
+ if (siteHandlerInfo) {
+ // Update the style for the handler info
+ const style = doc.createElement('style');
+ style.textContent += `
+ .site-handler-info {
+ font-size: 12px;
+ margin-bottom: 8px;
+ color: #f0f0f0;
+ }
+
+ .handler-stats {
+ display: flex;
+ gap: 8px;
+ margin-top: 4px;
+ }
+
+ .stat-item {
+ background: rgba(255, 255, 255, 0.1);
+ padding: 2px 6px;
+ border-radius: 4px;
+ font-size: 10px;
+ }
+ `;
+ doc.head.appendChild(style);
+
+ // Show handler info
+ statsContainer.innerHTML = `
+ This page uses the ${siteHandlerInfo.name} handler for enhanced capture.
+
+ ${siteHandlerInfo.id === 'reddit' ? 'Reddit-specific options available in settings' : ''}
+
+ `;
+
+ // Get stats if available
+ const stats = await getHandlerStats();
+ const handlerStats = stats[siteHandlerInfo.id];
+
+ if (handlerStats) {
+ const statsRow = statsContainer.querySelector('.handler-stats');
+ if (handlerStats.captureCount) {
+ statsRow.innerHTML += `Captured: ${handlerStats.captureCount}`;
+ }
+ }
+ } else {
+ statsContainer.style.display = 'none';
+ }
input.focus();
console.log('+ Showed ArchiveBox popup in iframe');
@@ -657,6 +737,22 @@ window.createPopup = async function() {
// Initial resize
setTimeout(resizeIframe, 0);
+
+ notifyUrlVisit(current_entry.url);
}
window.createPopup();
+
+// Function to notify background script about URL visit
+async function notifyUrlVisit(url) {
+ if (!url) return;
+
+ try {
+ await chrome.runtime.sendMessage({
+ type: 'urlVisited',
+ url
+ });
+ } catch (error) {
+ // Ignore any errors
+ }
+}
diff --git a/reddit-content.js b/reddit-content.js
new file mode 100644
index 0000000..d4283fd
--- /dev/null
+++ b/reddit-content.js
@@ -0,0 +1,365 @@
+// reddit-content.js
+// Content script for detecting Reddit posts in the viewport with improved architecture
+
+// Configuration
+const CONFIG = {
+ OBSERVATION_THRESHOLD: 0.4, // Post must be 40% visible to trigger capture
+ ROOT_MARGIN: "100px", // Extend detection area beyond viewport
+ QUEUE_PROCESS_DELAY: 100, // Delay between processing items in queue
+ MUTATION_OBSERVER_DELAY: 150, // Delay after DOM changes before finding new posts
+ MAX_PROCESSED_POSTS: 1000, // Maximum number of processed post IDs to store
+ DEBUG_MODE: true // Enable debug logging
+};
+
+// State management
+const state = {
+ observedPosts: new Set(), // Posts we've already seen and processed
+ postQueue: [], // Queue of posts to process in positional order
+ isProcessingQueue: false, // Whether we're currently processing the queue
+ captureCount: 0, // Number of posts captured in this session
+ isEnabled: false, // Whether capture is enabled
+ isInitialized: false // Whether we've initialized the system
+};
+
+/**
+ * Debug logging
+ */
+function debugLog(...args) {
+ if (CONFIG.DEBUG_MODE) {
+ console.log('[ArchiveBox Reddit]', ...args);
+ }
+}
+
+/**
+ * Process posts in order from top to bottom of page
+ */
+function processNextPost() {
+ if (state.postQueue.length === 0) {
+ state.isProcessingQueue = false;
+ return;
+ }
+
+ state.isProcessingQueue = true;
+
+ // Sort post queue by Y position (top to bottom)
+ state.postQueue.sort((a, b) => a.position - b.position);
+
+ // Process the topmost post
+ const postToProcess = state.postQueue.shift();
+ capturePost(postToProcess.postElement, postToProcess.postId);
+
+ // Continue processing the queue with a small delay to prevent UI blocking
+ setTimeout(processNextPost, CONFIG.QUEUE_PROCESS_DELAY);
+}
+
+/**
+ * Queue a post for capture based on its position in the viewport
+ */
+function queuePostForCapture(postElement, postId) {
+ // Get the vertical position of the post
+ const rect = postElement.getBoundingClientRect();
+ const position = rect.top;
+
+ // Add to queue with position data
+ state.postQueue.push({
+ postElement,
+ postId,
+ position
+ });
+
+ // Start processing queue if not already running
+ if (!state.isProcessingQueue) {
+ processNextPost();
+ }
+}
+
+/**
+ * Extract useful information from a post element
+ */
+function extractPostData(postElement, postId) {
+ // Extract post details - try different selectors to handle Reddit's different UI versions
+ const titleElement = postElement.querySelector(
+ 'h1, h3, [data-testid="post-title"], [data-click-id="body"] h2, a.title'
+ );
+
+ const linkElement = postElement.querySelector(
+ 'a.title, [data-click-id="body"], a[data-click-id="comments"], [data-testid="post-title"] a'
+ );
+
+ if (!titleElement) {
+ debugLog('Could not find title element in post:', postId);
+ return null;
+ }
+
+ // Get title
+ const title = titleElement.textContent.trim();
+
+ // Get permalink/URL
+ let url = '';
+ if (linkElement && linkElement.href) {
+ url = linkElement.href;
+ } else {
+ // Try to construct URL from post ID if it matches Reddit's post ID format
+ const redditId = postId.replace('t3_', '');
+ if (redditId.length >= 6) {
+ // Try to extract subreddit
+ const subredditElement = postElement.querySelector('a[href^="/r/"]');
+ const subredditName = subredditElement ? subredditElement.textContent.replace('r/', '') : '';
+
+ if (subredditName) {
+ url = `https://www.reddit.com/r/${subredditName}/comments/${redditId}/`;
+ } else {
+ url = `https://www.reddit.com/comments/${redditId}/`;
+ }
+ }
+ }
+
+ if (!title || !url) {
+ debugLog('Insufficient data for post, skipping');
+ return null;
+ }
+
+ // Get subreddit
+ const subredditElement = postElement.querySelector('a[href^="/r/"]');
+ const subreddit = subredditElement ? subredditElement.textContent.replace('r/', '') : '';
+
+ return {
+ url,
+ title,
+ subreddit
+ };
+}
+
+/**
+ * Capture post data and send to background script
+ */
+function capturePost(postElement, postId) {
+ // Only capture the post if we haven't already processed it
+ if (state.observedPosts.has(postId)) return;
+
+ // Mark as processed and manage the max size of observedPosts
+ state.observedPosts.add(postId);
+ if (state.observedPosts.size > CONFIG.MAX_PROCESSED_POSTS) {
+ // Remove oldest entries (approximation since Sets don't guarantee order)
+ const excess = state.observedPosts.size - CONFIG.MAX_PROCESSED_POSTS;
+ const entries = Array.from(state.observedPosts).slice(0, excess);
+ entries.forEach(entry => state.observedPosts.delete(entry));
+ debugLog(`Pruned ${excess} old post IDs from observed set`);
+ }
+
+ // Extract post data
+ const postData = extractPostData(postElement, postId);
+ if (!postData) return;
+
+ // Increment capture count
+ state.captureCount++;
+
+ // Send to background script with high priority
+ chrome.runtime.sendMessage({
+ type: 'capture',
+ entry: {
+ url: postData.url,
+ title: postData.title,
+ tags: ['reddit', postData.subreddit, 'viewport-captured'].filter(Boolean),
+ timestamp: new Date().toISOString(),
+ priority: 'high' // Mark as high priority
+ }
+ });
+
+ // Add visual indicator to the post
+ addVisualIndicator(postElement);
+
+ // Show status immediately
+ chrome.runtime.sendMessage({
+ type: 'showStatus',
+ message: `Captured: ${postData.title.substring(0, 40)}...`,
+ count: state.captureCount,
+ immediate: true // Request immediate display
+ });
+
+ debugLog('Captured post in viewport:', postData.title, postData.url);
+}
+
+/**
+ * Add a small visual indicator to show the post has been captured
+ */
+function addVisualIndicator(postElement) {
+ // Create indicator if it doesn't exist
+ if (!postElement.querySelector('.archivebox-captured-indicator')) {
+ const indicator = document.createElement('div');
+ indicator.className = 'archivebox-captured-indicator';
+ indicator.style.cssText = `
+ position: absolute;
+ top: 0;
+ right: 0;
+ background: rgba(0, 128, 0, 0.6);
+ color: white;
+ font-size: 10px;
+ padding: 2px 5px;
+ border-radius: 0 0 0 3px;
+ z-index: 9999;
+ `;
+ indicator.textContent = '✓ Archived';
+
+ // Make sure the post has a relative position for absolute positioning to work
+ if (getComputedStyle(postElement).position === 'static') {
+ postElement.style.position = 'relative';
+ }
+
+ postElement.appendChild(indicator);
+ }
+}
+
+/**
+ * Set up intersection observer to detect posts as they become visible
+ */
+function setupObserver() {
+ debugLog('Setting up viewport observer for Reddit');
+
+ const observer = new IntersectionObserver((entries) => {
+ entries.forEach(entry => {
+ if (entry.isIntersecting && entry.intersectionRatio >= CONFIG.OBSERVATION_THRESHOLD) {
+ const postElement = entry.target;
+
+ // Extract post ID to avoid processing the same post multiple times
+ const postId = postElement.id ||
+ postElement.getAttribute('data-post-id') ||
+ postElement.getAttribute('data-fullname') ||
+ postElement.getAttribute('id');
+
+ if (!postId) return;
+
+ // Queue for processing in top-to-bottom order
+ queuePostForCapture(postElement, postId);
+ }
+ });
+ }, {
+ threshold: CONFIG.OBSERVATION_THRESHOLD,
+ rootMargin: CONFIG.ROOT_MARGIN
+ });
+
+ // Find and observe posts
+ function findAndObservePosts() {
+ // Attempt to find posts using different selectors for different Reddit versions
+ const postSelectors = [
+ // Current "new" Reddit redesign
+ 'div[data-testid="post-container"]',
+ '.Post',
+ '[data-test-id="post-content"]',
+
+ // Old Reddit design
+ '.thing[data-author]',
+
+ // Mobile Reddit
+ 'article[data-testid="post"]',
+
+ // Generic fallbacks that might work across versions
+ '[data-click-id="body"]',
+ '.scrollerItem'
+ ];
+
+ const postElements = document.querySelectorAll(postSelectors.join(', '));
+
+ if (postElements.length > 0) {
+ debugLog(`Found ${postElements.length} Reddit posts to observe`);
+ postElements.forEach(post => observer.observe(post));
+ }
+ }
+
+ // Initial find
+ findAndObservePosts();
+
+ // Set up mutation observer to detect new posts loaded during scrolling
+ const mutationObserver = new MutationObserver((mutations) => {
+ let shouldFindPosts = false;
+
+ for (const mutation of mutations) {
+ if (mutation.addedNodes.length > 0) {
+ shouldFindPosts = true;
+ break;
+ }
+ }
+
+ if (shouldFindPosts) {
+ // Wait a small amount of time for any post loading to complete
+ // This helps reduce redundant processing during rapid DOM changes
+ clearTimeout(state.mutationTimeout);
+ state.mutationTimeout = setTimeout(findAndObservePosts, CONFIG.MUTATION_OBSERVER_DELAY);
+ }
+ });
+
+ // Observe changes to the body and any feed containers
+ const feedContainers = [
+ document.body,
+ ...document.querySelectorAll('.ListingLayout-outerContainer, .browse-container, #siteTable')
+ ];
+
+ feedContainers.forEach(container => {
+ if (container) {
+ mutationObserver.observe(container, { childList: true, subtree: true });
+ }
+ });
+
+ return {
+ disconnect: () => {
+ observer.disconnect();
+ mutationObserver.disconnect();
+ debugLog('Observers disconnected');
+ }
+ };
+}
+
+/**
+ * Initialize the content script
+ */
+function initialize() {
+ if (state.isInitialized) return;
+
+ // Only run on Reddit domains
+ if (!window.location.hostname.includes('reddit.com')) {
+ return;
+ }
+
+ debugLog('Reddit page detected, checking if capture is enabled');
+
+ // Check if capture is enabled in the extension settings
+ chrome.runtime.sendMessage({ type: 'getEnableStatus' }, function(response) {
+ if (response && response.enableScrollCapture) {
+ debugLog('Reddit capture enabled, setting up viewport detection');
+ state.isEnabled = true;
+ state.observers = setupObserver();
+ } else {
+ debugLog('Reddit capture is disabled in settings');
+ state.isEnabled = false;
+ }
+
+ state.isInitialized = true;
+ });
+
+ // Listen for status changes
+ chrome.runtime.onMessage.addListener((message) => {
+ if (message.type === 'captureStatusChanged') {
+ if (message.enabled && !state.isEnabled) {
+ // Capture was enabled
+ debugLog('Capture was enabled, setting up observers');
+ state.isEnabled = true;
+ state.observers = setupObserver();
+ } else if (!message.enabled && state.isEnabled) {
+ // Capture was disabled
+ debugLog('Capture was disabled, shutting down observers');
+ state.isEnabled = false;
+ if (state.observers) {
+ state.observers.disconnect();
+ state.observers = null;
+ }
+ }
+ }
+ });
+}
+
+// Handle initialization properly
+if (document.readyState === 'loading') {
+ document.addEventListener('DOMContentLoaded', initialize);
+} else {
+ initialize();
+}
diff --git a/reddit-handler.js b/reddit-handler.js
new file mode 100644
index 0000000..620b8c5
--- /dev/null
+++ b/reddit-handler.js
@@ -0,0 +1,593 @@
+// reddit-handler.js
+// Manages all Reddit-specific capture functionality
+
+// Configuration
+const CONFIG = {
+ CAPTURE_DELAY: 1000, // Delay between captures in ms
+ VIEWPORT_CAPTURE_DELAY: 100, // Quicker for visible posts
+ MAX_PROCESSED_URLS: 1000, // Maximum number of URLs to keep in memory
+ DEBUG_MODE: true,
+ BATCH_SIZE: 10, // Number of entries to batch save
+ STORAGE_KEY: 'reddit_processed_urls' // Key for storing processed URLs
+};
+
+// State management
+let processedUrls = new Set();
+let captureCount = 0;
+let isInitialized = false;
+
+// Queues with priority
+const captureQueue = {
+ high: [], // Viewport-visible posts
+ normal: [], // Background discovered posts
+ processing: false
+};
+
+/**
+ * Debug logging
+ */
+function debugLog(...args) {
+ if (CONFIG.DEBUG_MODE) {
+ console.log('[Reddit Handler]', ...args);
+ }
+}
+
+/**
+ * Initialize the Reddit handler
+ */
+export async function initialize() {
+ if (isInitialized) return;
+
+ debugLog('Initializing Reddit handler');
+
+ // Load previously processed URLs from storage
+ const storage = await chrome.storage.local.get(CONFIG.STORAGE_KEY);
+ if (storage[CONFIG.STORAGE_KEY]) {
+ try {
+ const storedUrls = JSON.parse(storage[CONFIG.STORAGE_KEY]);
+ processedUrls = new Set(storedUrls);
+ debugLog(`Loaded ${processedUrls.size} previously processed URLs`);
+ } catch (e) {
+ debugLog('Error parsing stored URLs:', e);
+ processedUrls = new Set();
+ }
+ }
+
+ // Reset capture count
+ captureCount = 0;
+
+ // Setup listeners
+ setupRedditListeners();
+
+ isInitialized = true;
+ debugLog('Reddit handler initialized');
+
+ // Start queue processor
+ processQueue();
+}
+
+/**
+ * Setup listeners for Reddit-specific functionality
+ */
+function setupRedditListeners() {
+ // Listen for navigation to Reddit post pages
+ chrome.webRequest.onCompleted.addListener(
+ handleRedditNavigation,
+ { urls: ["*://*.reddit.com/*"] },
+ []
+ );
+
+ // Listen for POST requests that might contain Reddit data
+ chrome.webRequest.onBeforeRequest.addListener(
+ handleRedditApiRequest,
+ { urls: ["*://*.reddit.com/*"] },
+ ["requestBody"]
+ );
+}
+
+/**
+ * Handle navigation to a Reddit post
+ */
+async function handleRedditNavigation(details) {
+ // Only interested in document navigation
+ if (details.type !== 'main_frame' && details.type !== 'sub_frame') {
+ return;
+ }
+
+ // Check if URL contains Reddit and is a post
+ if (!details.url.includes('reddit.com') || !isRedditPostUrl(details.url)) {
+ return;
+ }
+
+ // Get settings to see if we should capture
+ const { enableScrollCapture } = await chrome.storage.local.get(['enableScrollCapture']);
+ if (!enableScrollCapture) {
+ return;
+ }
+
+ debugLog('Detected navigation to Reddit post:', details.url);
+
+ // Inject content script for viewport detection
+ injectContentScript(details.tabId);
+
+ // Wait for page to load title
+ setTimeout(async () => {
+ try {
+ // Get tab info
+ const tab = await chrome.tabs.get(details.tabId).catch(() => null);
+ if (!tab) return;
+
+ // Process the URL
+ processRedditNavigationUrl(details.url, tab.title, details.tabId);
+ } catch (e) {
+ debugLog('Error processing Reddit navigation:', e);
+ }
+ }, 1000);
+}
+
+/**
+ * Handle Reddit API requests that might contain post data
+ */
+async function handleRedditApiRequest(details) {
+ if (details.method !== "POST") return;
+
+ // Check for relevant endpoints
+ const isRedditAPIEndpoint =
+ details.url.includes('/svc/shreddit/events') ||
+ details.url.includes('/svc/shreddit/graphql') ||
+ details.url.includes('/api/');
+
+ if (!isRedditAPIEndpoint) return;
+
+ // Check if capture is enabled
+ const { enableScrollCapture } = await chrome.storage.local.get(['enableScrollCapture']);
+ if (!enableScrollCapture) {
+ return;
+ }
+
+ try {
+ // Try to parse the request body if available
+ if (details.requestBody && details.requestBody.raw) {
+ for (const raw of details.requestBody.raw) {
+ if (raw.bytes) {
+ const decoder = new TextDecoder();
+ const text = decoder.decode(raw.bytes);
+
+ // Look for post data patterns
+ if (text.includes('"post":') && text.includes('"title":')) {
+ extractPostsFromJson(text, details.tabId);
+ } else if (text.includes('"subreddit_name":') && text.includes('"title":')) {
+ extractPostsFromJson(text, details.tabId);
+ }
+ }
+ }
+ }
+ } catch (e) {
+ debugLog('Error processing request body:', e);
+ }
+}
+
+/**
+ * Extract posts from JSON data
+ */
+function extractPostsFromJson(jsonText, tabId) {
+ try {
+ // For debugging, log a sample of what we're trying to parse
+ debugLog('Parsing JSON data sample:', jsonText.substring(0, 200));
+
+ // Try to parse the JSON
+ let data = null;
+ try {
+ data = JSON.parse(jsonText);
+ } catch (e) {
+ debugLog('Failed to parse JSON:', e.message);
+ return;
+ }
+
+ // Check for Reddit's specific structure with "info" array
+ if (data && data.info && Array.isArray(data.info)) {
+ debugLog('Found Reddit info array with', data.info.length, 'items');
+
+ // Process each item in the info array
+ data.info.forEach(item => {
+ // Check if this item has a post object
+ if (item && item.post) {
+ // Extract the post data
+ const post = item.post;
+
+ // Check for title field
+ if (post.title) {
+ debugLog('Found post with title:', post.title);
+
+ // Create URL
+ let url = '';
+ if (post.url && post.url.startsWith('/')) {
+ url = 'https://www.reddit.com' + post.url;
+ } else if (post.url) {
+ url = post.url;
+ } else if (post.id && post.id.startsWith('t3_')) {
+ // Construct URL from post ID
+ const postId = post.id.substring(3);
+
+ // Include subreddit if available
+ if (post.subreddit_name) {
+ const subreddit = post.subreddit_name.replace('r/', '');
+ url = `https://www.reddit.com/r/${subreddit}/comments/${postId}`;
+ } else {
+ url = `https://www.reddit.com/comments/${postId}`;
+ }
+ }
+
+ if (url) {
+ // Extract subreddit
+ let subreddit = '';
+ if (post.subreddit_name) {
+ subreddit = post.subreddit_name.replace('r/', '');
+ }
+
+ // Create post data object
+ const postData = {
+ url: url,
+ title: post.title,
+ subreddit: subreddit,
+ timestamp: new Date().toISOString()
+ };
+
+ // Queue the post for processing with normal priority
+ queueForCapture(postData, tabId, 'normal');
+ }
+ }
+ }
+ });
+ }
+ } catch (e) {
+ debugLog('Error processing JSON data:', e);
+ }
+}
+
+/**
+ * Check if URL is a Reddit post
+ */
+function isRedditPostUrl(url) {
+ try {
+ if (!url.includes('reddit.com')) return false;
+
+ const parsedUrl = new URL(url);
+ return parsedUrl.pathname.includes('/comments/');
+ } catch (e) {
+ return false;
+ }
+}
+
+/**
+ * Process a Reddit navigation URL
+ */
+function processRedditNavigationUrl(url, pageTitle, tabId) {
+ try {
+ const parsedUrl = new URL(url);
+ const pathParts = parsedUrl.pathname.split('/');
+
+ // Check for /comments/ format
+ if (pathParts.includes('comments')) {
+ const commentsIndex = pathParts.indexOf('comments');
+
+ // Need at least comment ID
+ if (commentsIndex + 1 < pathParts.length) {
+ // Get subreddit if present
+ let subreddit = '';
+ if (pathParts[1] === 'r' && pathParts[2]) {
+ subreddit = pathParts[2];
+ }
+
+ // Clean up title
+ let title = pageTitle || '';
+ if (title.includes(' - Reddit')) {
+ title = title.split(' - Reddit')[0].trim();
+ }
+
+ // Create post data
+ const postData = {
+ url: url,
+ title: title || 'Reddit Post',
+ subreddit: subreddit,
+ timestamp: new Date().toISOString()
+ };
+
+ // Queue for processing with normal priority
+ queueForCapture(postData, tabId, 'normal');
+ }
+ }
+ } catch (e) {
+ debugLog('Error processing Reddit URL:', e);
+ }
+}
+
+/**
+ * Queue a post for capture with priority
+ */
+function queueForCapture(postData, tabId, priority = 'normal') {
+ if (!postData || !postData.url || !postData.title) {
+ debugLog('Invalid post data, skipping:', postData);
+ return;
+ }
+
+ // Normalize URL to avoid duplicates
+ const normalizedUrl = normalizeRedditUrl(postData.url);
+
+ // Skip if already processed
+ if (processedUrls.has(normalizedUrl)) {
+ debugLog('Skipping already processed URL:', normalizedUrl);
+ return;
+ }
+
+ debugLog(`Queueing Reddit post with ${priority} priority:`, postData.title);
+
+ // Add to appropriate queue
+ captureQueue[priority].push({
+ data: postData,
+ tabId: tabId,
+ queuedAt: Date.now()
+ });
+
+ // Start processing if not already running
+ if (!captureQueue.processing) {
+ processQueue();
+ }
+}
+
+/**
+ * Process the capture queue
+ */
+async function processQueue() {
+ if (captureQueue.high.length === 0 && captureQueue.normal.length === 0) {
+ captureQueue.processing = false;
+ debugLog('Queue empty, stopping processor');
+ return;
+ }
+
+ captureQueue.processing = true;
+
+ // Process high priority queue first
+ let item;
+ let delay;
+
+ if (captureQueue.high.length > 0) {
+ item = captureQueue.high.shift();
+ delay = CONFIG.VIEWPORT_CAPTURE_DELAY;
+ } else {
+ item = captureQueue.normal.shift();
+ delay = CONFIG.CAPTURE_DELAY;
+ }
+
+ // Get age of item in queue
+ const queueAge = Date.now() - item.queuedAt;
+ debugLog(`Processing post from queue (age: ${queueAge}ms):`, item.data.title);
+
+ // Normalize URL for deduplication
+ const normalizedUrl = normalizeRedditUrl(item.data.url);
+
+ // Mark as processed
+ addToProcessedUrls(normalizedUrl);
+ captureCount++;
+
+ // Create entry object
+ const entry = {
+ url: item.data.url,
+ title: item.data.title,
+ timestamp: item.data.timestamp,
+ tags: ['reddit', item.data.subreddit].filter(Boolean)
+ };
+
+ // Process the entry
+ await saveEntry(entry);
+
+ // Show status in tab - check if tab still exists first
+ try {
+ const tab = await chrome.tabs.get(item.tabId);
+ if (tab) {
+ chrome.runtime.sendMessage({
+ type: 'showStatus',
+ message: `${entry.title.substring(0, 40)}...`,
+ count: captureCount,
+ tabId: item.tabId
+ });
+ }
+ } catch (err) {
+ debugLog(`Tab ${item.tabId} doesn't exist anymore, skipping status update`);
+ }
+
+ // Schedule next item with delay
+ setTimeout(processQueue, delay);
+}
+
+/**
+ * Add URL to processed URLs and manage the size limit
+ */
+function addToProcessedUrls(url) {
+ processedUrls.add(url);
+
+ // If we've exceeded the limit, remove oldest items
+ // This is approximate since Sets don't guarantee order
+ if (processedUrls.size > CONFIG.MAX_PROCESSED_URLS) {
+ const urlsArray = Array.from(processedUrls);
+ const toRemove = urlsArray.slice(0, urlsArray.length - CONFIG.MAX_PROCESSED_URLS);
+ toRemove.forEach(u => processedUrls.delete(u));
+ debugLog(`Removed ${toRemove.length} old URLs from processed set`);
+ }
+
+ // Periodically save processed URLs to storage
+ if (processedUrls.size % 50 === 0) {
+ persistProcessedUrls();
+ }
+}
+
+/**
+ * Save processed URLs to storage
+ */
+async function persistProcessedUrls() {
+ const urlsArray = Array.from(processedUrls);
+ await chrome.storage.local.set({
+ [CONFIG.STORAGE_KEY]: JSON.stringify(urlsArray)
+ });
+ debugLog(`Saved ${urlsArray.length} processed URLs to storage`);
+}
+
+/**
+ * Normalize Reddit URL to avoid duplicates
+ */
+function normalizeRedditUrl(url) {
+ try {
+ const parsedUrl = new URL(url);
+
+ // Extract essential parts (subreddit & post ID)
+ const parts = parsedUrl.pathname.split('/');
+ const commentsIndex = parts.indexOf('comments');
+
+ if (commentsIndex > 0 && commentsIndex + 1 < parts.length) {
+ // Get post ID
+ const postId = parts[commentsIndex + 1];
+
+ // Get subreddit if available
+ let subreddit = '';
+ if (parts[1] === 'r' && parts[2]) {
+ subreddit = parts[2];
+ }
+
+ // Create canonical URL
+ if (subreddit) {
+ return `${parsedUrl.origin}/r/${subreddit}/comments/${postId}`;
+ } else {
+ return `${parsedUrl.origin}/comments/${postId}`;
+ }
+ }
+
+ // Fallback to removing query params and fragments
+ return `${parsedUrl.origin}${parsedUrl.pathname}`;
+ } catch (e) {
+ debugLog('Error normalizing URL:', e);
+ return url;
+ }
+}
+
+/**
+ * Save entry to local storage
+ * Eventually used for batch saving
+ */
+async function saveEntry(entry) {
+ try {
+ // Add custom tags if configured
+ const { scrollCaptureTags } = await chrome.storage.local.get(['scrollCaptureTags']);
+ const customTags = scrollCaptureTags ?
+ scrollCaptureTags.split(',').map(tag => tag.trim()) : [];
+
+ // Create the full entry object
+ const fullEntry = {
+ id: crypto.randomUUID(),
+ url: entry.url,
+ timestamp: entry.timestamp || new Date().toISOString(),
+ tags: ['auto-captured', 'reddit', ...customTags, ...(entry.tags || [])],
+ title: entry.title || 'Reddit Post',
+ notes: `Auto-captured from Reddit: ${entry.url}`
+ };
+
+ // Save to storage
+ const { entries = [] } = await chrome.storage.local.get('entries');
+
+ // Normalize URLs for more accurate comparison
+ const normalizeUrl = (url) => {
+ try {
+ const normalized = new URL(url);
+ return normalized.origin + normalized.pathname.replace(/\/$/, '');
+ } catch (e) {
+ return url;
+ }
+ };
+
+ // Check if this URL already exists in our entries
+ const normalizedEntryUrl = normalizeUrl(entry.url);
+ const existingEntry = entries.find(e => normalizeUrl(e.url) === normalizedEntryUrl);
+
+ if (!existingEntry) {
+ entries.push(fullEntry);
+ await chrome.storage.local.set({ entries });
+ debugLog('Entry saved to local storage:', fullEntry.title);
+ } else {
+ debugLog('URL already exists in entries, skipping:', entry.url);
+ }
+ } catch (e) {
+ debugLog('Error saving entry:', e);
+ }
+}
+
+/**
+ * Inject content script for viewport detection
+ */
+export async function injectContentScript(tabId) {
+ try {
+ const { enableScrollCapture } = await chrome.storage.local.get(['enableScrollCapture']);
+ if (!enableScrollCapture) {
+ debugLog('Reddit capture is disabled in settings, not injecting content script');
+ return;
+ }
+
+ debugLog('Injecting Reddit content script into tab:', tabId);
+
+ await chrome.scripting.executeScript({
+ target: { tabId: tabId },
+ files: ['reddit-content.js']
+ });
+
+ debugLog('Content script injected successfully');
+ } catch (err) {
+ debugLog('Error injecting content script:', err.message);
+ }
+}
+
+/**
+ * Handle high priority capture request from content script
+ */
+export function captureHighPriority(entry, tabId) {
+ debugLog('Received high priority capture request from content script:', entry.url);
+
+ // Create post data object
+ const postData = {
+ url: entry.url,
+ title: entry.title,
+ subreddit: entry.tags.find(tag => tag !== 'reddit' && tag !== 'viewport-captured'),
+ timestamp: entry.timestamp
+ };
+
+ // Queue with high priority
+ queueForCapture(postData, tabId, 'high');
+}
+
+/**
+ * Clear all queues and reset
+ */
+export function reset() {
+ captureQueue.high = [];
+ captureQueue.normal = [];
+ captureQueue.processing = false;
+ captureCount = 0;
+ debugLog('Reddit handler reset');
+}
+
+/**
+ * Public method to check if we should capture the current URL
+ */
+export function shouldCaptureUrl(url) {
+ if (!url.includes('reddit.com')) return false;
+ return isRedditPostUrl(url);
+}
+
+/**
+ * Get stats about the Reddit handler
+ */
+export function getStats() {
+ return {
+ captureCount,
+ processedUrlsCount: processedUrls.size,
+ highPriorityQueueLength: captureQueue.high.length,
+ normalPriorityQueueLength: captureQueue.normal.length
+ };
+}
diff --git a/site-handlers.js b/site-handlers.js
new file mode 100644
index 0000000..4385ebd
--- /dev/null
+++ b/site-handlers.js
@@ -0,0 +1,254 @@
+// site-handlers.js
+// Registry for all site-specific handlers
+
+import * as RedditHandler from './reddit-handler.js';
+
+
+// Debug configuration
+const DEBUG = true;
+
+// Debug logging
+function debugLog(...args) {
+ if (DEBUG) {
+ console.log('[Site Handlers]', ...args);
+ }
+}
+
+// Registry of all available site handlers
+const handlers = {
+ // Reddit handler
+ reddit: {
+ name: 'Reddit',
+ module: RedditHandler,
+ domains: ['reddit.com'],
+ description: 'Automatically captures Reddit posts while browsing',
+ version: '1.0.0',
+ author: 'ArchiveBox'
+ }
+
+ // Add more site handlers here following the same format
+ // For example:
+ /*
+ twitter: {
+ name: 'Twitter',
+ module: TwitterHandler,
+ domains: ['twitter.com', 'x.com'],
+ description: 'Captures tweets and threads',
+ version: '1.0.0',
+ author: 'ArchiveBox'
+ }
+ */
+};
+
+/**
+ * Initialize all site handlers
+ */
+export async function initializeAll() {
+ debugLog('Initializing all site handlers');
+
+ // Check if site capture is enabled
+ const { enableScrollCapture } = await chrome.storage.local.get('enableScrollCapture');
+
+ if (!enableScrollCapture) {
+ debugLog('Site capture is disabled, skipping initialization');
+ return;
+ }
+
+ // Initialize each handler
+ for (const [id, handler] of Object.entries(handlers)) {
+ if (handler.module && typeof handler.module.initialize === 'function') {
+ try {
+ debugLog(`Initializing ${handler.name} handler`);
+ await handler.module.initialize();
+ } catch (error) {
+ console.error(`Error initializing ${handler.name} handler:`, error);
+ }
+ }
+ }
+
+ debugLog('All site handlers initialized');
+}
+
+/**
+ * Get a specific handler by ID
+ */
+export function getHandler(handlerId) {
+ return handlers[handlerId]?.module;
+}
+
+/**
+ * Find a handler for a specific URL
+ */
+export function findHandlerForUrl(url) {
+ try {
+ const hostname = new URL(url).hostname.toLowerCase();
+
+ for (const [id, handler] of Object.entries(handlers)) {
+ if (handler.domains.some(domain => hostname.includes(domain))) {
+ return { id, handler: handler.module };
+ }
+ }
+ } catch (error) {
+ console.error('Error finding handler for URL:', error);
+ }
+
+ return null;
+}
+
+/**
+ * Handle capture request from content script
+ */
+export async function handleCaptureRequest(entry, tabId) {
+ const handlerResult = findHandlerForUrl(entry.url);
+
+ if (handlerResult) {
+ debugLog(`Using ${handlerResult.id} handler for ${entry.url}`);
+
+ if (entry.priority === 'high' && typeof handlerResult.handler.captureHighPriority === 'function') {
+ return handlerResult.handler.captureHighPriority(entry, tabId);
+ } else if (typeof handlerResult.handler.captureNormal === 'function') {
+ return handlerResult.handler.captureNormal(entry, tabId);
+ }
+ }
+
+ // No specific handler found, use generic method
+ debugLog(`No specific handler for ${entry.url}, using generic method`);
+ return saveGenericEntry(entry);
+}
+
+/**
+ * Save a generic entry
+ */
+async function saveGenericEntry(entry) {
+ try {
+ if (!entry || !entry.url) {
+ return { success: false, reason: 'Invalid entry' };
+ }
+
+ // Get current entries
+ const { entries = [] } = await chrome.storage.local.get('entries');
+
+ // Check for duplicates
+ const normalizeUrl = (url) => {
+ try {
+ const normalized = new URL(url);
+ return normalized.origin + normalized.pathname.replace(/\/$/, '');
+ } catch (e) {
+ return url;
+ }
+ };
+
+ const normalizedEntryUrl = normalizeUrl(entry.url);
+ const existingEntry = entries.find(e => normalizeUrl(e.url) === normalizedEntryUrl);
+
+ if (existingEntry) {
+ return { success: false, reason: 'URL already exists' };
+ }
+
+ // Add custom tags if configured
+ const { scrollCaptureTags } = await chrome.storage.local.get(['scrollCaptureTags']);
+ const customTags = scrollCaptureTags ?
+ scrollCaptureTags.split(',').map(tag => tag.trim()) : [];
+
+ // Extract site tags
+ const siteTags = getSiteTags(entry.url);
+
+ // Create the full entry object
+ const fullEntry = {
+ id: entry.id || crypto.randomUUID(),
+ url: entry.url,
+ timestamp: entry.timestamp || new Date().toISOString(),
+ tags: ['auto-captured', ...siteTags, ...customTags, ...(entry.tags || [])],
+ title: entry.title || 'Captured content',
+ notes: entry.notes || `Auto-captured content: ${entry.url}`,
+ favicon: entry.favicon
+ };
+
+ // Add to entries
+ entries.push(fullEntry);
+
+ // Save entries
+ await chrome.storage.local.set({ entries });
+
+ return { success: true };
+ } catch (e) {
+ return { success: false, reason: e.message };
+ }
+}
+
+/**
+ * Extract site name for tagging
+ */
+function getSiteTags(url) {
+ try {
+ const hostname = new URL(url).hostname;
+ const domain = hostname
+ .replace('www.', '')
+ .replace(/\.(com|org|net|io|gov|edu)$/, '');
+ return [domain];
+ } catch (e) {
+ return [];
+ }
+}
+
+/**
+ * Check a specific URL against all site handlers
+ */
+export function shouldCaptureUrl(url) {
+ try {
+ const handlerResult = findHandlerForUrl(url);
+
+ if (handlerResult && handlerResult.handler.shouldCaptureUrl) {
+ return handlerResult.handler.shouldCaptureUrl(url);
+ }
+ } catch (error) {
+ console.error('Error checking if URL should be captured:', error);
+ }
+
+ return false;
+}
+
+/**
+ * Inject appropriate content script for a URL
+ */
+export async function injectContentScriptForUrl(url, tabId) {
+ try {
+ const handlerResult = findHandlerForUrl(url);
+
+ if (handlerResult && handlerResult.handler.injectContentScript) {
+ await handlerResult.handler.injectContentScript(tabId);
+ return true;
+ }
+ } catch (error) {
+ console.error('Error injecting content script:', error);
+ }
+
+ return false;
+}
+
+/**
+ * Get stats from all handlers
+ */
+export function getAllStats() {
+ const stats = {};
+
+ for (const [id, handler] of Object.entries(handlers)) {
+ if (handler.module && typeof handler.module.getStats === 'function') {
+ stats[id] = handler.module.getStats();
+ }
+ }
+
+ return stats;
+}
+
+/**
+ * Get all handlers
+ * Returns the complete registry of site handlers with their metadata
+ * @returns {Object} Object containing all registered handlers with their metadata
+ */
+export function getAllHandlers() {
+ return handlers;
+}
+
+// Export all handlers for direct access
+export const Reddit = RedditHandler;
diff --git a/utils.js b/utils.js
index e7dcc35..960ecdd 100755
--- a/utils.js
+++ b/utils.js
@@ -10,6 +10,20 @@ export async function getArchiveBoxServerUrl() {
export function filterEntries(entries, filterText) {
if (!filterText) return entries;
+ // Handle site: prefix
+ if (filterText.toLowerCase().startsWith('site:')) {
+ const siteId = filterText.slice(5).toLowerCase().trim();
+ const handlers = getAllHandlers();
+ const handler = handlers[siteId];
+
+ if (handler) {
+ return entries.filter(entry =>
+ handler.domains.some(domain => entry.url.includes(domain))
+ );
+ }
+ }
+
+ // Regular search
const searchTerms = filterText.toLowerCase().split(' ');
return entries.filter(entry => {
const searchableText = [
@@ -207,3 +221,124 @@ export async function syncToArchiveBox(entry) {
};
}
}
+
+/**
+ * Check if a URL should be captured automatically based on regex patterns
+ * @param {string} url - The URL to check
+ * @returns {boolean} - Whether the URL should be captured
+ */
+export async function shouldAutoCapture(url) {
+ if (!url) return false;
+
+ try {
+ const { match_urls, exclude_urls } = await chrome.storage.local.get(['match_urls', 'exclude_urls']);
+
+ // If no match pattern is defined, don't capture
+ if (!match_urls) return false;
+
+ // Create RegExp objects
+ const matchPattern = new RegExp(match_urls);
+ const excludePattern = exclude_urls ? new RegExp(exclude_urls) : null;
+
+ // Check if URL matches the inclusion pattern and doesn't match the exclusion pattern
+ if (matchPattern.test(url)) {
+ return !excludePattern || !excludePattern.test(url);
+ }
+
+ return false;
+ } catch (e) {
+ console.error('Error checking if URL should be captured:', e);
+ return false;
+ }
+}
+
+/**
+ * Get all available site handlers
+ * @returns {Promise} - Array of site handler information
+ */
+export async function getAvailableSiteHandlers() {
+ try {
+ return await chrome.runtime.sendMessage({ type: 'getSiteHandlers' });
+ } catch (e) {
+ console.error('Error getting site handlers:', e);
+ return [];
+ }
+}
+
+/**
+ * Get capture statistics
+ * @returns {Promise