diff --git a/docs/examples/file_download_stream.ts b/docs/examples/file_download_stream.ts index 8931ecc2c764..fa3d3d59ad9d 100644 --- a/docs/examples/file_download_stream.ts +++ b/docs/examples/file_download_stream.ts @@ -1,9 +1,9 @@ import { pipeline, Transform } from 'stream'; -import { FileDownload, type Log } from 'crawlee'; +import { FileDownload, type CrawleeLogger } from 'crawlee'; // A sample Transform stream logging the download progress. -function createProgressTracker({ url, log, totalBytes }: { url: URL; log: Log; totalBytes: number }) { +function createProgressTracker({ url, log, totalBytes }: { url: URL; log: CrawleeLogger; totalBytes: number }) { let downloadedBytes = 0; return new Transform({ diff --git a/packages/basic-crawler/src/internals/basic-crawler.ts b/packages/basic-crawler/src/internals/basic-crawler.ts index a04470688569..e5cc0faaee5e 100644 --- a/packages/basic-crawler/src/internals/basic-crawler.ts +++ b/packages/basic-crawler/src/internals/basic-crawler.ts @@ -6,6 +6,7 @@ import type { AddRequestsBatchedResult, AutoscaledPoolOptions, Configuration, + CrawleeLogger, CrawlingContext, DatasetExportOptions, EnqueueLinksOptions, @@ -40,6 +41,7 @@ import { EnqueueStrategy, EventType, KeyValueStore, + LogLevel, mergeCookies, NonRetryableError, purgeDefaultStorages, @@ -77,8 +79,6 @@ import { getDomain } from 'tldts'; import type { ReadonlyDeep, SetRequired } from 'type-fest'; import { LruCache } from '@apify/datastructures'; -import type { Log } from '@apify/log'; -import defaultLog, { LogLevel } from '@apify/log'; import { addTimeoutToPromise, TimeoutError, tryCancel } from '@apify/timeout'; import { cryptoRandomObjectId } from '@apify/utilities'; @@ -370,9 +370,6 @@ export interface BasicCrawlerOptions< */ onSkippedRequest?: SkippedRequestCallback; - /** @internal */ - log?: Log; - /** * Enables experimental features of Crawlee, which can alter the behavior of the crawler. * WARNING: these options are not guaranteed to be stable and may change or be removed at any time. @@ -415,6 +412,12 @@ export interface BasicCrawlerOptions< */ eventManager?: EventManager; + /** + * Custom logger to use for this crawler. + * If provided, the crawler will use its own ServiceLocator instance instead of the global one. + */ + logger?: CrawleeLogger; + /** * A unique identifier for the crawler instance. This ID is used to isolate the state returned by * {@apilink BasicCrawler.useState|`crawler.useState()`} from other crawler instances. @@ -586,7 +589,12 @@ export class BasicCrawler< running = false; hasFinishedBefore = false; - readonly log: Log; + #log!: CrawleeLogger; + + get log(): CrawleeLogger { + return this.#log; + } + protected requestHandler!: RequestHandler; protected errorHandler?: ErrorHandler; protected failedRequestHandler?: ErrorHandler; @@ -651,6 +659,7 @@ export class BasicCrawler< configuration: ow.optional.object, storageClient: ow.optional.object, eventManager: ow.optional.object, + logger: ow.optional.object, // AutoscaledPool shorthands minConcurrency: ow.optional.number, @@ -659,7 +668,6 @@ export class BasicCrawler< keepAlive: ow.optional.boolean, // internal - log: ow.optional.object, experiments: ow.optional.object, statisticsOptions: ow.optional.object, @@ -695,6 +703,7 @@ export class BasicCrawler< configuration, storageClient, eventManager, + logger, // AutoscaledPool shorthands minConcurrency, @@ -714,7 +723,6 @@ export class BasicCrawler< httpClient, // internal - log = defaultLog.child({ prefix: this.constructor.name }), experiments = {}, id, @@ -730,15 +738,18 @@ export class BasicCrawler< if ( storageClient || eventManager || + logger || (configuration !== undefined && configuration !== serviceLocator.getConfiguration()) ) { - const scopedServiceLocator = new ServiceLocator(configuration, eventManager, storageClient); + const scopedServiceLocator = new ServiceLocator(configuration, eventManager, storageClient, logger); serviceLocatorScope = bindMethodsToServiceLocator(scopedServiceLocator, this); } try { serviceLocatorScope.enterScope(); + this.#log = serviceLocator.getLogger().child({ prefix: this.constructor.name }); + // Store whether the user explicitly provided an ID this.hasExplicitId = id !== undefined; // Store the user-provided ID, or generate a unique one for tracking purposes (not for state key) @@ -793,7 +804,6 @@ export class BasicCrawler< this.httpClient = httpClient ?? new GotScrapingHttpClient(); this.proxyConfiguration = proxyConfiguration; - this.log = log; this.statusMessageLoggingInterval = statusMessageLoggingInterval; this.statusMessageCallback = statusMessageCallback as StatusMessageCallback; this.domainAccessedTime = new Map(); @@ -833,19 +843,19 @@ export class BasicCrawler< this.sameDomainDelayMillis = sameDomainDelaySecs * 1000; this.maxSessionRotations = maxSessionRotations; this.stats = new Statistics({ - logMessage: `${log.getOptions().prefix} request statistics:`, - log, + logMessage: `${this.constructor.name} request statistics:`, + log: this.log, ...(this.hasExplicitId ? { id: this.crawlerId } : {}), ...statisticsOptions, }); this.sessionPoolOptions = { ...sessionPoolOptions, - log, + log: this.log, }; if (this.retryOnBlocked) { this.sessionPoolOptions.blockedStatusCodes = sessionPoolOptions.blockedStatusCodes ?? []; if (this.sessionPoolOptions.blockedStatusCodes.length !== 0) { - log.warning( + this.log.warning( `Both 'blockedStatusCodes' and 'retryOnBlocked' are set. Please note that the 'retryOnBlocked' feature might not work as expected.`, ); } @@ -854,7 +864,7 @@ export class BasicCrawler< const maxSignedInteger = 2 ** 31 - 1; if (this.requestHandlerTimeoutMillis > maxSignedInteger) { - log.warning( + this.log.warning( `requestHandlerTimeoutMillis ${this.requestHandlerTimeoutMillis}` + ` does not fit a signed 32-bit integer. Limiting the value to ${maxSignedInteger}`, ); @@ -885,7 +895,7 @@ export class BasicCrawler< isTaskReadyFunction: async () => { if (isMaxPagesExceeded()) { if (this.shouldLogMaxProcessedRequestsExceeded) { - log.info( + this.log.info( 'Crawler reached the maxRequestsPerCrawl limit of ' + `${this.maxRequestsPerCrawl} requests and will shut down soon. Requests that are in progress will be allowed to finish.`, ); @@ -898,7 +908,7 @@ export class BasicCrawler< }, isFinishedFunction: async () => { if (isMaxPagesExceeded()) { - log.info( + this.log.info( `Earlier, the crawler reached the maxRequestsPerCrawl limit of ${this.maxRequestsPerCrawl} requests ` + 'and all requests that were in progress at that time have now finished. ' + `In total, the crawler processed ${this.handledRequestsCount} requests and will shut down.`, @@ -914,12 +924,12 @@ export class BasicCrawler< const reason = isFinishedFunction ? "Crawler's custom isFinishedFunction() returned true, the crawler will shut down." : 'All requests from the queue have been processed, the crawler will shut down.'; - log.info(reason); + this.log.info(reason); } return isFinished; }, - log, + log: this.log, }; this.autoscaledPoolOptions = { ...autoscaledPoolOptions, ...basicCrawlerAutoscaledPoolConfiguration }; @@ -944,7 +954,7 @@ export class BasicCrawler< async setStatusMessage(message: string, options: SetStatusMessageOptions = {}) { const data = options.isStatusMessageTerminal != null ? { terminal: options.isStatusMessageTerminal } : undefined; - this.log.internal(LogLevel[(options.level as 'DEBUG') ?? 'DEBUG'], message, data); + this.log.logWithLevel(LogLevel[(options.level as 'DEBUG') ?? 'DEBUG'], message, data); const client = serviceLocator.getStorageClient(); @@ -1099,7 +1109,7 @@ export class BasicCrawler< retryHistogram: this.stats.requestRetryHistogram, ...finalStats, }; - this.log.info('Final request statistics:', stats); + this.log.info('Final request statistics:', stats as unknown as Record); if (this.stats.errorTracker.total !== 0) { const prettify = ([count, info]: [number, string[]]) => @@ -1193,12 +1203,14 @@ export class BasicCrawler< BasicCrawler.useStateCrawlerIds.add(this.crawlerId); if (BasicCrawler.useStateCrawlerIds.size > 1) { - defaultLog.warningOnce( - 'Multiple crawler instances are calling useState() without an explicit `id` option. \n' + - 'This means they will share the same state object, which is likely unintended. \n' + - 'To fix this, provide a unique `id` option to each crawler instance. \n' + - 'Example: new BasicCrawler({ id: "my-crawler-1", ... })', - ); + serviceLocator + .getLogger() + .warningOnce( + 'Multiple crawler instances are calling useState() without an explicit `id` option. \n' + + 'This means they will share the same state object, which is likely unintended. \n' + + 'To fix this, provide a unique `id` option to each crawler instance. \n' + + 'Example: new BasicCrawler({ id: "my-crawler-1", ... })', + ); } return kvs.getAutoSavedValue(BasicCrawler.CRAWLEE_STATE_KEY, defaultValue); diff --git a/packages/browser-pool/src/abstract-classes/browser-controller.ts b/packages/browser-pool/src/abstract-classes/browser-controller.ts index 0c546488ed7f..078fcd889a99 100644 --- a/packages/browser-pool/src/abstract-classes/browser-controller.ts +++ b/packages/browser-pool/src/abstract-classes/browser-controller.ts @@ -1,3 +1,4 @@ +import { type CrawleeLogger, serviceLocator } from '@crawlee/core'; import type { Cookie, Dictionary } from '@crawlee/types'; import { nanoid } from 'nanoid'; import { TypedEmitter } from 'tiny-typed-emitter'; @@ -6,7 +7,6 @@ import { tryCancel } from '@apify/timeout'; import { BROWSER_CONTROLLER_EVENTS } from '../events.js'; import type { LaunchContext } from '../launch-context.js'; -import { log } from '../logger.js'; import type { UnwrapPromise } from '../utils.js'; import type { BrowserPlugin, CommonBrowser, CommonLibrary } from './browser-plugin.js'; @@ -40,6 +40,7 @@ export abstract class BrowserController< NewPageResult = UnwrapPromise>, > extends TypedEmitter> { id = nanoid(); + protected log!: CrawleeLogger; /** * The `BrowserPlugin` instance used to launch the browser. @@ -90,6 +91,7 @@ export abstract class BrowserController< constructor(browserPlugin: BrowserPlugin) { super(); + this.log = serviceLocator.getLogger().child({ prefix: 'BrowserPool' }); this.browserPlugin = browserPlugin; } @@ -136,14 +138,14 @@ export abstract class BrowserController< // TODO: shouldn't this go in a finally instead? this.isActive = false; } catch (error) { - log.debug(`Could not close browser.\nCause: ${(error as Error).message}`, { id: this.id }); + this.log.debug(`Could not close browser.\nCause: ${(error as Error).message}`, { id: this.id }); } this.emit(BROWSER_CONTROLLER_EVENTS.BROWSER_CLOSED, this); setTimeout(() => { this._kill().catch((err) => { - log.debug(`Could not kill browser.\nCause: ${err.message}`, { id: this.id }); + this.log.debug(`Could not kill browser.\nCause: ${err.message}`, { id: this.id }); }); }, PROCESS_KILL_TIMEOUT_MILLIS); } diff --git a/packages/browser-pool/src/abstract-classes/browser-plugin.ts b/packages/browser-pool/src/abstract-classes/browser-plugin.ts index 3eb69e011a99..144f4368a042 100644 --- a/packages/browser-pool/src/abstract-classes/browser-plugin.ts +++ b/packages/browser-pool/src/abstract-classes/browser-plugin.ts @@ -1,4 +1,4 @@ -import { CriticalError } from '@crawlee/core'; +import { type CrawleeLogger, CriticalError, serviceLocator } from '@crawlee/core'; import type { Dictionary } from '@crawlee/types'; import merge from 'lodash.merge'; @@ -105,6 +105,7 @@ export abstract class BrowserPlugin< NewPageResult = UnwrapPromise>, > { name = this.constructor.name; + protected log!: CrawleeLogger; library: Library; launchOptions: LibraryOptions; proxyUrl?: string; @@ -121,6 +122,7 @@ export abstract class BrowserPlugin< browserPerProxy = false, } = options; + this.log = serviceLocator.getLogger().child({ prefix: 'BrowserPool' }); this.library = library; this.launchOptions = launchOptions; this.proxyUrl = proxyUrl && new URL(proxyUrl).href.slice(0, -1); diff --git a/packages/browser-pool/src/browser-pool.ts b/packages/browser-pool/src/browser-pool.ts index 6da577a48037..6b1e16ad2d6e 100644 --- a/packages/browser-pool/src/browser-pool.ts +++ b/packages/browser-pool/src/browser-pool.ts @@ -1,4 +1,4 @@ -import type { TieredProxy } from '@crawlee/core'; +import { type CrawleeLogger, serviceLocator, type TieredProxy } from '@crawlee/core'; import type { BrowserFingerprintWithHeaders } from 'fingerprint-generator'; import { FingerprintGenerator } from 'fingerprint-generator'; import { FingerprintInjector } from 'fingerprint-injector'; @@ -20,7 +20,6 @@ import { } from './fingerprinting/hooks.js'; import type { FingerprintGeneratorOptions } from './fingerprinting/types.js'; import type { LaunchContext } from './launch-context.js'; -import { log } from './logger.js'; import type { InferBrowserPluginArray, UnwrapPromise } from './utils.js'; const PAGE_CLOSE_KILL_TIMEOUT_MILLIS = 1000; @@ -334,9 +333,11 @@ export class BrowserPool< private browserRetireInterval?: NodeJS.Timeout; private limiter = pLimit(1); + private log!: CrawleeLogger; constructor(options: Options & BrowserPoolHooks) { super(); + this.log = serviceLocator.getLogger().child({ prefix: 'BrowserPool' }); this.browserKillerInterval!.unref(); @@ -708,7 +709,7 @@ export class BrowserPool< throw err; } - log.debug('Launched new browser.', { id: browserController.id }); + this.log.debug('Launched new browser.', { id: browserController.id }); browserController.proxyTier = proxyTier; browserController.proxyUrl = proxyUrl; @@ -719,7 +720,7 @@ export class BrowserPool< } catch (err) { this.startingBrowserControllers.delete(browserController); browserController.close().catch((closeErr) => { - log.error(`Could not close browser whose post-launch hooks failed.\nCause:${closeErr.message}`, { + this.log.error(`Could not close browser whose post-launch hooks failed.\nCause:${closeErr.message}`, { id: browserController.id, }); }); @@ -774,7 +775,7 @@ export class BrowserPool< if (isBrowserIdle || isBrowserEmpty) { const { id } = controller; - log.debug('Closing retired browser.', { id }); + this.log.debug('Closing retired browser.', { id }); await controller.close(); this.retiredBrowserControllers.delete(controller); closedBrowserIds.push(id); @@ -782,7 +783,7 @@ export class BrowserPool< } if (closedBrowserIds.length) { - log.debug('Closed retired browsers.', { + this.log.debug('Closed retired browsers.', { count: closedBrowserIds.length, closedBrowserIds, }); @@ -798,7 +799,7 @@ export class BrowserPool< await this._executeHooks(this.prePageCloseHooks, page, browserController); await originalPageClose.apply(page, args).catch((err: Error) => { - log.debug(`Could not close page.\nCause:${err.message}`, { id: browserController.id }); + this.log.debug(`Could not close page.\nCause:${err.message}`, { id: browserController.id }); }); await this._executeHooks(this.postPageCloseHooks, pageId, browserController); @@ -821,7 +822,7 @@ export class BrowserPool< // Run this with a delay, otherwise page.close() // might fail with "Protocol error (Target.closeTarget): Target closed." setTimeout(() => { - log.debug('Closing retired browser because it has no active pages', { id: browserController.id }); + this.log.debug('Closing retired browser because it has no active pages', { id: browserController.id }); void browserController.close().finally(() => { this.retiredBrowserControllers.delete(browserController); }); diff --git a/packages/browser-pool/src/logger.ts b/packages/browser-pool/src/logger.ts deleted file mode 100644 index c4bba72aa0e5..000000000000 --- a/packages/browser-pool/src/logger.ts +++ /dev/null @@ -1,5 +0,0 @@ -import defaultLog from '@apify/log'; - -export const log = defaultLog.child({ - prefix: 'BrowserPool', -}); diff --git a/packages/browser-pool/src/playwright/playwright-plugin.ts b/packages/browser-pool/src/playwright/playwright-plugin.ts index 6520ecebb6ff..9ae7677c0c88 100644 --- a/packages/browser-pool/src/playwright/playwright-plugin.ts +++ b/packages/browser-pool/src/playwright/playwright-plugin.ts @@ -6,7 +6,6 @@ import { BrowserPlugin } from '../abstract-classes/browser-plugin.js'; import { anonymizeProxySugar } from '../anonymize-proxy.js'; import type { createProxyServerForContainers } from '../container-proxy-server.js'; import type { LaunchContext } from '../launch-context.js'; -import { log } from '../logger.js'; import { getLocalProxyAddress } from '../proxy-server.js'; import type { SafeParameters } from '../utils.js'; import { PlaywrightBrowser as PlaywrightBrowserWithPersistentContext } from './playwright-browser.js'; @@ -82,7 +81,7 @@ export class PlaywrightPlugin extends BrowserPlugin< this._browserVersion = inactiveBrowser.version(); inactiveBrowser.close().catch((error) => { - log.exception(error, 'Failed to close browser.'); + this.log.exception(error, 'Failed to close browser.'); }); } diff --git a/packages/browser-pool/src/puppeteer/puppeteer-controller.ts b/packages/browser-pool/src/puppeteer/puppeteer-controller.ts index 3c654c99ce71..ef81c55c6c86 100644 --- a/packages/browser-pool/src/puppeteer/puppeteer-controller.ts +++ b/packages/browser-pool/src/puppeteer/puppeteer-controller.ts @@ -6,7 +6,6 @@ import { tryCancel } from '@apify/timeout'; import { BrowserController } from '../abstract-classes/browser-controller.js'; import { anonymizeProxySugar } from '../anonymize-proxy.js'; -import { log } from '../logger.js'; export interface PuppeteerNewPageOptions extends PuppeteerTypes.BrowserContextOptions { proxyUsername?: string; @@ -88,7 +87,7 @@ export class PuppeteerController extends BrowserController< try { await context.close(); } catch (error: any) { - log.exception(error, 'Failed to close context.'); + this.log.exception(error, 'Failed to close context.'); } finally { await close(); } @@ -120,7 +119,7 @@ export class PuppeteerController extends BrowserController< const browserProcess = this.browser.process(); if (!browserProcess) { - log.debug('Browser was connected using the `puppeteer.connect` method no browser to kill.'); + this.log.debug('Browser was connected using the `puppeteer.connect` method no browser to kill.'); return; } @@ -135,7 +134,7 @@ export class PuppeteerController extends BrowserController< await this.browser.close(); clearTimeout(timeout); } catch (error) { - log.debug('Browser was already killed.', { error }); + this.log.debug('Browser was already killed.', { error }); } } diff --git a/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts b/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts index b8e3b8edafb3..873a9ec4fcbf 100644 --- a/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts +++ b/packages/browser-pool/src/puppeteer/puppeteer-plugin.ts @@ -7,7 +7,6 @@ import type * as PuppeteerTypes from 'puppeteer'; import { BrowserPlugin } from '../abstract-classes/browser-plugin.js'; import { anonymizeProxySugar } from '../anonymize-proxy.js'; import type { LaunchContext } from '../launch-context.js'; -import { log } from '../logger.js'; import { noop } from '../utils.js'; import type { PuppeteerNewPageOptions } from './puppeteer-controller.js'; import { PuppeteerController } from './puppeteer-controller.js'; @@ -95,12 +94,12 @@ export class PuppeteerPlugin extends BrowserPlugin< if (page) { page.on('error', (error) => { - log.exception(error, 'Page crashed.'); + this.log.exception(error, 'Page crashed.'); page.close().catch(noop); }); } } catch (error: any) { - log.exception(error, 'Failed to retrieve page from target.'); + this.log.exception(error, 'Failed to retrieve page from target.'); } }); diff --git a/packages/core/src/autoscaling/autoscaled_pool.ts b/packages/core/src/autoscaling/autoscaled_pool.ts index bbe777a228aa..a7b2131f7454 100644 --- a/packages/core/src/autoscaling/autoscaled_pool.ts +++ b/packages/core/src/autoscaling/autoscaled_pool.ts @@ -1,12 +1,12 @@ import ow from 'ow'; -import type { Log } from '@apify/log'; import { addTimeoutToPromise } from '@apify/timeout'; import type { BetterIntervalID } from '@apify/utilities'; import { betterClearInterval, betterSetInterval } from '@apify/utilities'; import { CriticalError } from '../errors.js'; -import { log as defaultLog } from '../log.js'; +import type { CrawleeLogger } from '../log.js'; +import { serviceLocator } from '../service_locator.js'; import type { SnapshotterOptions } from './snapshotter.js'; import { Snapshotter } from './snapshotter.js'; import type { SystemInfo, SystemStatusOptions } from './system_status.js'; @@ -125,7 +125,7 @@ export interface AutoscaledPoolOptions { */ maxTasksPerMinute?: number; - log?: Log; + log?: CrawleeLogger; } /** @@ -177,7 +177,7 @@ export interface AutoscaledPoolOptions { * @category Scaling */ export class AutoscaledPool { - private readonly log: Log; + private readonly log: CrawleeLogger; // Configurable properties. private readonly desiredConcurrencyRatio: number; @@ -250,7 +250,7 @@ export class AutoscaledPool { autoscaleIntervalSecs = 10, systemStatusOptions, snapshotterOptions, - log = defaultLog, + log = serviceLocator.getLogger(), maxTasksPerMinute = Infinity, } = options; @@ -488,7 +488,10 @@ export class AutoscaledPool { const currentStatus = this.systemStatus.getCurrentStatus(); const { isSystemIdle } = currentStatus; if (!isSystemIdle && this._currentConcurrency >= this._minConcurrency) { - this.log.perf('Task will not be run. System is overloaded.', currentStatus); + this.log.perf( + 'Task will not be run. System is overloaded.', + currentStatus as unknown as Record, + ); return done(); } // - a task is ready. diff --git a/packages/core/src/autoscaling/snapshotter.ts b/packages/core/src/autoscaling/snapshotter.ts index 7be27f9d0fd2..80470c4d9c1d 100644 --- a/packages/core/src/autoscaling/snapshotter.ts +++ b/packages/core/src/autoscaling/snapshotter.ts @@ -1,12 +1,11 @@ import { getMemoryInfo, isContainerized } from '@crawlee/utils'; import ow from 'ow'; -import type { Log } from '@apify/log'; import type { BetterIntervalID } from '@apify/utilities'; import { betterClearInterval, betterSetInterval } from '@apify/utilities'; import { EventType } from '../events/event_manager.js'; -import { log as defaultLog } from '../log.js'; +import type { CrawleeLogger } from '../log.js'; import { serviceLocator } from '../service_locator.js'; import type { SystemInfo } from './system_status.js'; @@ -57,7 +56,7 @@ export interface SnapshotterOptions { snapshotHistorySecs?: number; /** @internal */ - log?: Log; + log?: CrawleeLogger; } interface MemorySnapshot { @@ -108,7 +107,7 @@ interface ClientSnapshot { * @category Scaling */ export class Snapshotter { - log: Log; + log: CrawleeLogger; eventLoopSnapshotIntervalMillis: number; clientSnapshotIntervalMillis: number; snapshotHistoryMillis: number; @@ -151,7 +150,7 @@ export class Snapshotter { maxBlockedMillis = 50, maxUsedMemoryRatio = 0.9, maxClientErrors = 3, - log = defaultLog, + log = serviceLocator.getLogger(), } = options; this.log = log.child({ prefix: 'Snapshotter' }); diff --git a/packages/core/src/configuration.ts b/packages/core/src/configuration.ts index c68ae1765830..61fcf8f9a362 100644 --- a/packages/core/src/configuration.ts +++ b/packages/core/src/configuration.ts @@ -5,8 +5,7 @@ import { join } from 'node:path'; import type { Dictionary } from '@crawlee/types'; import { pathExistsSync } from 'fs-extra/esm'; -import log, { LogLevel } from '@apify/log'; - +import { log, LogLevel } from './log.js'; import { serviceLocator } from './service_locator.js'; import { entries } from './typedefs.js'; diff --git a/packages/core/src/cookie_utils.ts b/packages/core/src/cookie_utils.ts index 6598ec3f30c3..1e05c23b7a32 100644 --- a/packages/core/src/cookie_utils.ts +++ b/packages/core/src/cookie_utils.ts @@ -1,7 +1,7 @@ import type { Cookie as CookieObject } from '@crawlee/types'; import { Cookie, CookieJar } from 'tough-cookie'; -import { log } from './log.js'; +import { serviceLocator } from './service_locator.js'; import { CookieParseError } from './session_pool/errors.js'; export interface ResponseLike { @@ -120,9 +120,11 @@ export function mergeCookies(url: string, sourceCookies: string[]): string { }); if (similarKeyCookie) { - log.warningOnce( - `Found cookies with similar name during cookie merging: '${cookie.key}' and '${similarKeyCookie.key}'`, - ); + serviceLocator + .getLogger() + .warningOnce( + `Found cookies with similar name during cookie merging: '${cookie.key}' and '${similarKeyCookie.key}'`, + ); } jar.setCookieSync(cookie, url); diff --git a/packages/core/src/crawlers/crawler_commons.ts b/packages/core/src/crawlers/crawler_commons.ts index 18c9bab2a37a..ede16bac0d9c 100644 --- a/packages/core/src/crawlers/crawler_commons.ts +++ b/packages/core/src/crawlers/crawler_commons.ts @@ -3,7 +3,7 @@ import type { ReadonlyDeep, SetRequired } from 'type-fest'; import type { Configuration } from '../configuration.js'; import type { EnqueueLinksOptions } from '../enqueue_links/enqueue_links.js'; -import type { Log } from '../log.js'; +import type { CrawleeLogger } from '../log.js'; import type { Request, Source } from '../request.js'; import type { Session } from '../session_pool/session.js'; import type { Dataset } from '../storages/dataset.js'; @@ -103,7 +103,7 @@ export interface RestrictedCrawlingContext extends RestrictedCrawlingContext { diff --git a/packages/core/src/crawlers/statistics.ts b/packages/core/src/crawlers/statistics.ts index dfbfb77a93f7..d96d43452873 100644 --- a/packages/core/src/crawlers/statistics.ts +++ b/packages/core/src/crawlers/statistics.ts @@ -1,10 +1,8 @@ import ow from 'ow'; -import type { Log } from '@apify/log'; - import type { EventManager } from '../events/event_manager.js'; import { EventType } from '../events/event_manager.js'; -import { log as defaultLog } from '../log.js'; +import type { CrawleeLogger } from '../log.js'; import { serviceLocator } from '../service_locator.js'; import { KeyValueStore } from '../storages/key_value_store.js'; import { ErrorTracker } from './error_tracker.js'; @@ -90,7 +88,7 @@ export class Statistics { private logMessage: string; private listener: () => Promise; private requestsInProgress = new Map(); - private readonly log: Log; + private readonly log: CrawleeLogger; private instanceStart!: number; private logInterval: unknown; private _events?: EventManager; @@ -134,7 +132,7 @@ export class Statistics { this.id = id ?? String(Statistics.id++); this.persistStateKey = `SDK_CRAWLER_STATISTICS_${this.id}`; - this.log = (options.log ?? defaultLog).child({ prefix: 'Statistics' }); + this.log = (options.log ?? serviceLocator.getLogger()).child({ prefix: 'Statistics' }); this.errorTracker = new ErrorTracker({ ...errorTrackerConfig, saveErrorSnapshots }); this.errorTrackerRetry = new ErrorTracker({ ...errorTrackerConfig, saveErrorSnapshots }); this.logIntervalMillis = logIntervalSecs * 1000; @@ -455,7 +453,7 @@ export interface StatisticsOptions { * Parent logger instance, the statistics will create a child logger from this. * @default crawler.log */ - log?: Log; + log?: CrawleeLogger; /** * Key value store instance to persist the statistics. diff --git a/packages/core/src/enqueue_links/enqueue_links.ts b/packages/core/src/enqueue_links/enqueue_links.ts index a0a49e281200..8a913864ce94 100644 --- a/packages/core/src/enqueue_links/enqueue_links.ts +++ b/packages/core/src/enqueue_links/enqueue_links.ts @@ -4,10 +4,9 @@ import ow from 'ow'; import { getDomain } from 'tldts'; import type { SetRequired } from 'type-fest'; -import log from '@apify/log'; - import type { RequestOptions } from '../request.js'; import { Request } from '../request.js'; +import { serviceLocator } from '../service_locator.js'; import type { AddRequestsBatchedOptions, AddRequestsBatchedResult, @@ -351,7 +350,7 @@ export async function enqueueLinks( } if (pseudoUrls?.length) { - log.deprecated('`pseudoUrls` option is deprecated, use `globs` or `regexps` instead'); + serviceLocator.getLogger().deprecated('`pseudoUrls` option is deprecated, use `globs` or `regexps` instead'); urlPatternObjects.push(...constructRegExpObjectsFromPseudoUrls(pseudoUrls)); } diff --git a/packages/core/src/events/event_manager.ts b/packages/core/src/events/event_manager.ts index 399bd519b99b..ffe6b14f50b5 100644 --- a/packages/core/src/events/event_manager.ts +++ b/packages/core/src/events/event_manager.ts @@ -1,9 +1,10 @@ import { AsyncEventEmitter } from '@vladfrangu/async_event_emitter'; -import log from '@apify/log'; import type { BetterIntervalID } from '@apify/utilities'; import { betterClearInterval, betterSetInterval } from '@apify/utilities'; +import { serviceLocator } from '../service_locator.js'; + export interface EventManagerOptions { /** Interval between emitted `persistState` events in milliseconds. */ persistStateIntervalMillis: number; @@ -28,7 +29,7 @@ export abstract class EventManager { protected events = new AsyncEventEmitter(); protected initialized = false; protected intervals: Intervals = {}; - protected log = log.child({ prefix: 'Events' }); + protected log = serviceLocator.getLogger().child({ prefix: 'Events' }); private persistStateIntervalMillis: number; constructor(options: EventManagerOptions) { diff --git a/packages/core/src/events/local_event_manager.ts b/packages/core/src/events/local_event_manager.ts index eb2802d69487..20809384fe75 100644 --- a/packages/core/src/events/local_event_manager.ts +++ b/packages/core/src/events/local_event_manager.ts @@ -1,4 +1,3 @@ -import log from '@apify/log'; import { betterClearInterval, betterSetInterval } from '@apify/utilities'; import type { SystemInfo } from '../autoscaling/system_status.js'; @@ -109,7 +108,7 @@ export class LocalEventManager extends EventManager { memCurrentBytes: memInfo.mainProcessBytes + memInfo.childProcessesBytes, }; } catch (err) { - log.exception(err as Error, 'Memory snapshot failed.'); + this.log.exception(err as Error, 'Memory snapshot failed.'); return {}; } } diff --git a/packages/core/src/log.ts b/packages/core/src/log.ts index 1bdbbd8e7dac..9395df6702df 100644 --- a/packages/core/src/log.ts +++ b/packages/core/src/log.ts @@ -1,5 +1,233 @@ import type { LoggerOptions } from '@apify/log'; import log, { Log, Logger, LoggerJson, LoggerText, LogLevel } from '@apify/log'; +/** + * Configuration options for Crawlee logger implementations. + */ +export interface CrawleeLoggerOptions { + /** Prefix to be prepended to each logged line. */ + prefix?: string | null; +} + +/** + * Interface for Crawlee logger implementations. + * This allows users to inject custom loggers (e.g., Winston, Pino) while maintaining + * compatibility with the default `@apify/log` implementation. + */ +export interface CrawleeLogger { + /** + * Returns the logger configuration. + */ + getOptions(): CrawleeLoggerOptions; + + /** + * Configures logger options. + */ + setOptions(options: Partial): void; + + /** + * Creates a new instance of logger that inherits settings from a parent logger. + */ + child(options: Partial): CrawleeLogger; + + /** + * Logs an `ERROR` message. + */ + error(message: string, data?: Record): void; + + /** + * Logs an `ERROR` level message with a nicely formatted exception. + */ + exception(exception: Error, message: string, data?: Record): void; + + /** + * Logs a `SOFT_FAIL` level message. + */ + softFail(message: string, data?: Record): void; + + /** + * Logs a `WARNING` level message. + */ + warning(message: string, data?: Record): void; + + /** + * Logs a `WARNING` level message only once. + */ + warningOnce(message: string): void; + + /** + * Logs an `INFO` message. + */ + info(message: string, data?: Record): void; + + /** + * Logs a `DEBUG` message. + */ + debug(message: string, data?: Record): void; + + /** + * Logs a `PERF` level message for performance tracking. + */ + perf(message: string, data?: Record): void; + + /** + * Logs given message only once as WARNING for deprecated features. + */ + deprecated(message: string): void; + + /** + * Logs a message at the given level. Useful when the log level is determined dynamically. + */ + logWithLevel(level: number, message: string, data?: Record): void; +} + +/** + * Abstract base class for custom Crawlee logger implementations. + * + * Subclasses must implement two methods: + * - {@apilink BaseCrawleeLogger.logWithLevel} — the core logging dispatch + * - {@apilink BaseCrawleeLogger.createChild} — how to create a child logger instance + * + * All other `CrawleeLogger` methods (`error`, `warning`, `info`, `debug`, etc.) + * are derived automatically. Level filtering is entirely the responsibility of the + * underlying library — `logWithLevel()` is called for every message. + * + * **Example — Winston adapter:** + * ```typescript + * const CRAWLEE_TO_WINSTON = { 1: 'error', 2: 'warn', 3: 'warn', 4: 'info', 5: 'debug', 6: 'debug' }; + * const WINSTON_TO_CRAWLEE = { error: 1, warn: 3, info: 4, debug: 5 }; + * + * class WinstonAdapter extends BaseCrawleeLogger { + * constructor(private logger: winston.Logger, options?: Partial) { + * super(options); + * } + * + * getLevel(): number { + * return WINSTON_TO_CRAWLEE[this.logger.level] ?? LogLevel.INFO; + * } + * + * setLevel(level: number): void { + * this.logger.level = CRAWLEE_TO_WINSTON[level] ?? 'info'; + * } + * + * logWithLevel(level: number, message: string, data?: Record): void { + * this.logger.log(CRAWLEE_TO_WINSTON[level] ?? 'info', message, { ...data, prefix: this.getOptions().prefix }); + * } + * + * protected createChild(options: Partial): CrawleeLogger { + * return new WinstonAdapter(this.logger.child({ prefix: options.prefix }), { ...this.getOptions(), ...options }); + * } + * } + * ``` + */ +export abstract class BaseCrawleeLogger implements CrawleeLogger { + private options: CrawleeLoggerOptions; + private readonly warningsLogged = new Set(); + + constructor(options: Partial = {}) { + this.options = options; + } + + /** + * Core logging method. Subclasses must implement this to dispatch log messages + * to the underlying logger (Winston, Pino, console, etc.). + * + * Level filtering is the responsibility of the underlying library — this method + * is called for every message regardless of the current level. + * + * @param level Crawlee log level (use {@apilink LogLevel} constants) + * @param message The log message + * @param data Optional structured data to attach to the log entry + */ + abstract logWithLevel(level: number, message: string, data?: Record): void; + + /** + * Creates a child logger instance. Subclasses must implement this to define + * how child loggers are created for the underlying logger. + */ + protected abstract createChild(options: Partial): CrawleeLogger; + + getOptions(): CrawleeLoggerOptions { + return this.options; + } + + setOptions(options: Partial): void { + this.options = { ...this.options, ...options }; + } + + child(options: Partial): CrawleeLogger { + return this.createChild(options); + } + + error(message: string, data?: Record): void { + this.logWithLevel(LogLevel.ERROR, message, data); + } + + exception(exception: Error, message: string, data?: Record): void { + this.logWithLevel(LogLevel.ERROR, `${message}: ${exception.message}`, { + ...data, + stack: exception.stack, + exception, + }); + } + + softFail(message: string, data?: Record): void { + this.logWithLevel(LogLevel.SOFT_FAIL, message, data); + } + + warning(message: string, data?: Record): void { + this.logWithLevel(LogLevel.WARNING, message, data); + } + + warningOnce(message: string): void { + if (!this.warningsLogged.has(message)) { + this.warningsLogged.add(message); + this.warning(message); + } + } + + info(message: string, data?: Record): void { + this.logWithLevel(LogLevel.INFO, message, data); + } + + debug(message: string, data?: Record): void { + this.logWithLevel(LogLevel.DEBUG, message, data); + } + + perf(message: string, data?: Record): void { + this.logWithLevel(LogLevel.PERF, `[PERF] ${message}`, data); + } + + deprecated(message: string): void { + this.warningOnce(`[DEPRECATED] ${message}`); + } +} + +/** + * Adapter that wraps `@apify/log`'s {@apilink Log} instance to implement the {@apilink CrawleeLogger} interface. + * + * This is the default logger used by Crawlee when no custom logger is configured. + * Users who want to use a different logging library should implement {@apilink BaseCrawleeLogger} directly. + */ +export class ApifyLogAdapter extends BaseCrawleeLogger { + constructor( + private readonly apifyLog: Log, + options?: Partial, + ) { + super(options ?? {}); + } + + logWithLevel(level: number, message: string, data?: Record): void { + this.apifyLog.internal(level as LogLevel, message, data); + } + + protected createChild(options: Partial): CrawleeLogger { + return new ApifyLogAdapter(this.apifyLog.child({ prefix: options.prefix ?? null }), { + ...this.getOptions(), + ...options, + }); + } +} + export { log, Log, LogLevel, Logger, LoggerJson, LoggerText }; export type { LoggerOptions }; diff --git a/packages/core/src/proxy_configuration.ts b/packages/core/src/proxy_configuration.ts index b16f8de04104..154b59404057 100644 --- a/packages/core/src/proxy_configuration.ts +++ b/packages/core/src/proxy_configuration.ts @@ -1,9 +1,8 @@ import type { Dictionary, ProxyInfo } from '@crawlee/types'; import ow from 'ow'; -import log from '@apify/log'; - import type { Request } from './request.js'; +import { serviceLocator } from './service_locator.js'; export interface ProxyConfigurationFunction { (options?: { request?: Request }): string | null | Promise; @@ -139,7 +138,7 @@ export class ProxyConfiguration { protected tieredProxyUrls?: UrlList[]; protected usedProxyUrls = new Map(); protected newUrlFunction?: ProxyConfigurationFunction; - protected log = log.child({ prefix: 'ProxyConfiguration' }); + protected log = serviceLocator.getLogger().child({ prefix: 'ProxyConfiguration' }); protected domainTiers = new Map(); /** @@ -276,7 +275,7 @@ export class ProxyConfiguration { typeof request.userData.__crawlee.lastProxyTier === 'number' && request.userData.__crawlee.lastProxyTier !== tierPrediction ) { - log.debug( + this.log.debug( `Changing proxy tier for domain "${domain}" from ${request.userData.__crawlee.lastProxyTier} to ${tierPrediction}.`, ); } diff --git a/packages/core/src/recoverable_state.ts b/packages/core/src/recoverable_state.ts index aa67079713a5..cff79a8d0f89 100644 --- a/packages/core/src/recoverable_state.ts +++ b/packages/core/src/recoverable_state.ts @@ -1,9 +1,6 @@ -import type { Configuration } from '@crawlee/core'; +import type { Configuration, CrawleeLogger } from '@crawlee/core'; import { EventType, KeyValueStore, serviceLocator } from '@crawlee/core'; -import type { Log } from '@apify/log'; -import log from '@apify/log'; - export interface RecoverableStatePersistenceOptions { /** * The key under which the state is stored in the KeyValueStore @@ -42,7 +39,7 @@ export interface RecoverableStateOptions> /** * A logger instance for logging operations related to state persistence */ - logger?: Log; + logger?: CrawleeLogger; /** * Configuration instance to use @@ -81,7 +78,7 @@ export class RecoverableState> { private readonly persistStateKvsName?: string; private readonly persistStateKvsId?: string; private keyValueStore: KeyValueStore | null = null; - private readonly log: Log; + private readonly log: CrawleeLogger; private readonly serialize: (state: TStateModel) => string; private readonly deserialize: (serializedState: string) => TStateModel; @@ -96,7 +93,7 @@ export class RecoverableState> { this.persistenceEnabled = options.persistenceEnabled ?? false; this.persistStateKvsName = options.persistStateKvsName; this.persistStateKvsId = options.persistStateKvsId; - this.log = options.logger ?? log.child({ prefix: 'RecoverableState' }); + this.log = options.logger ?? serviceLocator.getLogger().child({ prefix: 'RecoverableState' }); this.serialize = options.serialize ?? JSON.stringify; this.deserialize = options.deserialize ?? JSON.parse; diff --git a/packages/core/src/request.ts b/packages/core/src/request.ts index b25cd1436b01..8869da13120f 100644 --- a/packages/core/src/request.ts +++ b/packages/core/src/request.ts @@ -10,13 +10,11 @@ import { normalizeUrl } from '@apify/utilities'; import type { EnqueueLinksOptions } from './enqueue_links/enqueue_links.js'; import type { SkippedRequestReason } from './enqueue_links/shared.js'; -import { log as defaultLog } from './log.js'; +import { serviceLocator } from './service_locator.js'; import type { AllowedHttpMethods } from './typedefs.js'; import { keys } from './typedefs.js'; // new properties on the Request object breaks serialization -const log = defaultLog.child({ prefix: 'Request' }); - const requestOptionalPredicates = { id: ow.optional.string, loadedUrl: ow.optional.string.url, @@ -442,11 +440,13 @@ class CrawleeRequest { const normalizedUrl = normalizeUrl(url, keepUrlFragment) || url; // It returns null when url is invalid, causing weird errors. if (!useExtendedUniqueKey) { if (normalizedMethod !== 'GET' && payload) { - log.warningOnce( - `We've encountered a ${normalizedMethod} Request with a payload. ` + - 'This is fine. Just letting you know that if your requests point to the same URL ' + - 'and differ only in method and payload, you should see the "useExtendedUniqueKey" option of Request constructor.', - ); + serviceLocator + .getLogger() + .warningOnce( + `We've encountered a ${normalizedMethod} Request with a payload. ` + + 'This is fine. Just letting you know that if your requests point to the same URL ' + + 'and differ only in method and payload, you should see the "useExtendedUniqueKey" option of Request constructor.', + ); } return normalizedUrl; } diff --git a/packages/core/src/service_locator.ts b/packages/core/src/service_locator.ts index d22b1c84d8d5..72d00fc72fcf 100644 --- a/packages/core/src/service_locator.ts +++ b/packages/core/src/service_locator.ts @@ -9,6 +9,8 @@ import { Configuration } from './configuration.js'; import { ServiceConflictError } from './errors.js'; import type { EventManager } from './events/event_manager.js'; import { LocalEventManager } from './events/local_event_manager.js'; +import type { CrawleeLogger } from './log.js'; +import { ApifyLogAdapter } from './log.js'; import type { IStorage, StorageManager } from './storages/storage_manager.js'; import type { Constructor } from './typedefs.js'; @@ -55,6 +57,26 @@ interface ServiceLocatorInterface { */ setStorageClient(storageClient: StorageClient): void; + /** + * Get the logger. + * Returns the default `@apify/log` logger if none has been set. + */ + getLogger(): CrawleeLogger; + + /** + * Set the logger. + * + * @param logger The logger to set + * @throws {ServiceConflictError} If a different logger has already been retrieved + */ + setLogger(logger: CrawleeLogger): void; + + /** + * Get a child logger with the given prefix. + * Equivalent to `getLogger().child({ prefix })`. + */ + getChildLog(prefix: string): CrawleeLogger; + getStorageManager(constructor: Constructor): StorageManager | undefined; setStorageManager(constructor: Constructor, storageManager: StorageManager): void; @@ -105,10 +127,15 @@ interface ServiceLocatorInterface { * // Crawler has its own isolated ServiceLocator instance * ``` */ +// Used as fallback in ServiceLocator methods that need to log before a logger is explicitly set, +// without implicitly locking the logger slot (which getLogger() would do). +const fallbackLog = new ApifyLogAdapter(log); + export class ServiceLocator implements ServiceLocatorInterface { private configuration?: Configuration; private eventManager?: EventManager; private storageClient?: StorageClient; + private logger?: CrawleeLogger; /** * Storage managers for Dataset, KeyValueStore, and RequestQueue. @@ -122,16 +149,25 @@ export class ServiceLocator implements ServiceLocatorInterface { * @param configuration Optional configuration instance to use * @param eventManager Optional event manager instance to use * @param storageClient Optional storage client instance to use + * @param logger Optional logger instance to use */ - constructor(configuration?: Configuration, eventManager?: EventManager, storageClient?: StorageClient) { + constructor( + configuration?: Configuration, + eventManager?: EventManager, + storageClient?: StorageClient, + logger?: CrawleeLogger, + ) { this.configuration = configuration; this.eventManager = eventManager; this.storageClient = storageClient; + this.logger = logger; } getConfiguration(): Configuration { if (!this.configuration) { - log.debug('No configuration set, implicitly creating and using default Configuration.'); + (this.logger ?? fallbackLog).debug( + 'No configuration set, implicitly creating and using default Configuration.', + ); this.configuration = new Configuration(); } return this.configuration; @@ -153,9 +189,11 @@ export class ServiceLocator implements ServiceLocatorInterface { getEventManager(): EventManager { if (!this.eventManager) { - log.debug('No event manager set, implicitly creating and using default LocalEventManager.'); + (this.logger ?? fallbackLog).debug( + 'No event manager set, implicitly creating and using default LocalEventManager.', + ); if (!this.configuration) { - log.warning( + (this.logger ?? fallbackLog).warning( 'Implicit creation of event manager will implicitly set configuration as side effect. ' + 'It is advised to explicitly first set the configuration instead.', ); @@ -181,9 +219,11 @@ export class ServiceLocator implements ServiceLocatorInterface { getStorageClient(): StorageClient { if (!this.storageClient) { - log.debug('No storage client set, implicitly creating and using default MemoryStorage.'); + (this.logger ?? fallbackLog).debug( + 'No storage client set, implicitly creating and using default MemoryStorage.', + ); if (!this.configuration) { - log.warning( + (this.logger ?? fallbackLog).warning( 'Implicit creation of storage client will implicitly set configuration as side effect. ' + 'It is advised to explicitly first set the configuration instead.', ); @@ -210,6 +250,29 @@ export class ServiceLocator implements ServiceLocatorInterface { this.storageClient = storageClient; } + getLogger(): CrawleeLogger { + if (!this.logger) { + this.logger = new ApifyLogAdapter(log); + } + return this.logger; + } + + setLogger(logger: CrawleeLogger): void { + if (this.logger === logger) { + return; + } + + if (this.logger) { + throw new ServiceConflictError('Logger', logger, this.logger); + } + + this.logger = logger; + } + + getChildLog(prefix: string): CrawleeLogger { + return this.getLogger().child({ prefix }); + } + getStorageManager(constructor: Constructor): StorageManager | undefined { return this.storageManagers.get(constructor); } @@ -244,6 +307,7 @@ export class ServiceLocator implements ServiceLocatorInterface { this.configuration = undefined; this.eventManager = undefined; this.storageClient = undefined; + this.logger = undefined; this.clearStorageManagerCache(); } } @@ -344,6 +408,18 @@ export const serviceLocator: ServiceLocatorInterface = { const currentServiceLocator = serviceLocatorStorage.getStore() ?? globalServiceLocator; currentServiceLocator.setStorageClient(storageClient); }, + getLogger(): CrawleeLogger { + const currentServiceLocator = serviceLocatorStorage.getStore() ?? globalServiceLocator; + return currentServiceLocator.getLogger(); + }, + setLogger(logger: CrawleeLogger): void { + const currentServiceLocator = serviceLocatorStorage.getStore() ?? globalServiceLocator; + currentServiceLocator.setLogger(logger); + }, + getChildLog(prefix: string): CrawleeLogger { + const currentServiceLocator = serviceLocatorStorage.getStore() ?? globalServiceLocator; + return currentServiceLocator.getChildLog(prefix); + }, getStorageManager(constructor: Constructor): StorageManager | undefined { const currentServiceLocator = serviceLocatorStorage.getStore() ?? globalServiceLocator; return currentServiceLocator.getStorageManager(constructor); diff --git a/packages/core/src/session_pool/session.ts b/packages/core/src/session_pool/session.ts index 95c5cd7378f2..c9a3b10ac24d 100644 --- a/packages/core/src/session_pool/session.ts +++ b/packages/core/src/session_pool/session.ts @@ -5,7 +5,6 @@ import ow from 'ow'; import type { Cookie } from 'tough-cookie'; import { CookieJar } from 'tough-cookie'; -import type { Log } from '@apify/log'; import { cryptoRandomObjectId } from '@apify/utilities'; import { @@ -14,7 +13,8 @@ import { getDefaultCookieExpirationDate, toughCookieToBrowserPoolCookie, } from '../cookie_utils.js'; -import { log as defaultLog } from '../log.js'; +import type { CrawleeLogger } from '../log.js'; +import { serviceLocator } from '../service_locator.js'; import { EVENT_SESSION_RETIRED } from './events.js'; export interface SessionOptions { @@ -69,7 +69,7 @@ export interface SessionOptions { /** SessionPool instance. Session will emit the `sessionRetired` event on this instance. */ sessionPool?: import('./session_pool.js').SessionPool; - log?: Log; + log?: CrawleeLogger; errorScore?: number; cookieJar?: CookieJar; proxyInfo?: ProxyInfo; @@ -95,7 +95,7 @@ export class Session implements ISession { private _errorScore: number; private _proxyInfo?: ProxyInfo; private _cookieJar: CookieJar; - private log: Log; + private log: CrawleeLogger; get errorScore() { return this._errorScore; @@ -170,7 +170,7 @@ export class Session implements ISession { usageCount = 0, errorScore = 0, maxUsageCount = 50, - log = defaultLog, + log = serviceLocator.getLogger(), } = options; const { expiresAt = getDefaultCookieExpirationDate(maxAgeSecs) } = options; diff --git a/packages/core/src/session_pool/session_pool.ts b/packages/core/src/session_pool/session_pool.ts index 3051b0efe711..7f567a2f6828 100644 --- a/packages/core/src/session_pool/session_pool.ts +++ b/packages/core/src/session_pool/session_pool.ts @@ -4,12 +4,10 @@ import type { Dictionary } from '@crawlee/types'; import { AsyncQueue } from '@sapphire/async-queue'; import ow from 'ow'; -import type { Log } from '@apify/log'; - import type { PersistenceOptions } from '../crawlers/statistics.js'; import type { EventManager } from '../events/event_manager.js'; import { EventType } from '../events/event_manager.js'; -import { log as defaultLog } from '../log.js'; +import type { CrawleeLogger } from '../log.js'; import { serviceLocator } from '../service_locator.js'; import { KeyValueStore } from '../storages/key_value_store.js'; import { BLOCKED_STATUS_CODES, MAX_POOL_SIZE, PERSIST_STATE_KEY } from './consts.js'; @@ -61,7 +59,7 @@ export interface SessionPoolOptions { blockedStatusCodes?: number[]; /** @internal */ - log?: Log; + log?: CrawleeLogger; /** * Control how and when to persist the state of the session pool. @@ -135,7 +133,7 @@ export interface SessionPoolOptions { * @category Scaling */ export class SessionPool extends EventEmitter { - protected log: Log; + protected log: CrawleeLogger; protected maxPoolSize: number; protected createSessionFunction: CreateSession; protected keyValueStore!: KeyValueStore; @@ -179,7 +177,7 @@ export class SessionPool extends EventEmitter { createSessionFunction, sessionOptions = {}, blockedStatusCodes = BLOCKED_STATUS_CODES, - log = defaultLog, + log = serviceLocator.getLogger(), persistenceOptions = { enable: true, }, diff --git a/packages/core/src/storages/dataset.ts b/packages/core/src/storages/dataset.ts index 985a92b895aa..4645e815fe37 100644 --- a/packages/core/src/storages/dataset.ts +++ b/packages/core/src/storages/dataset.ts @@ -5,7 +5,7 @@ import ow from 'ow'; import { MAX_PAYLOAD_SIZE_BYTES } from '@apify/consts'; import { Configuration } from '../configuration.js'; -import { type Log, log } from '../log.js'; +import type { CrawleeLogger } from '../log.js'; import { serviceLocator } from '../service_locator.js'; import type { Awaitable } from '../typedefs.js'; import { checkStorageAccess } from './access_checking.js'; @@ -235,7 +235,7 @@ export class Dataset { name?: string; client: DatasetClient; readonly storageObject?: Record; - log: Log = log.child({ prefix: 'Dataset' }); + log: CrawleeLogger; /** * @internal @@ -248,6 +248,7 @@ export class Dataset { this.name = options.name; this.client = options.client.dataset(this.id) as DatasetClient; this.storageObject = options.storageObject; + this.log = serviceLocator.getLogger().child({ prefix: 'Dataset' }); } /** diff --git a/packages/core/src/storages/key_value_store.ts b/packages/core/src/storages/key_value_store.ts index cbe9a165d01b..fd5e4fd8a795 100644 --- a/packages/core/src/storages/key_value_store.ts +++ b/packages/core/src/storages/key_value_store.ts @@ -6,7 +6,6 @@ import JSON5 from 'json5'; import ow, { ArgumentError } from 'ow'; import { KEY_VALUE_STORE_KEY_REGEX } from '@apify/consts'; -import log from '@apify/log'; import { jsonStringifyExtended } from '@apify/utilities'; import { Configuration } from '../configuration.js'; @@ -287,7 +286,9 @@ export class KeyValueStore { this.setValue(key, value, { timeoutSecs, doNotRetryTimeouts: true, - }).catch((error) => log.warning(`Failed to persist the state value to ${key}`, { error })), + }).catch((error) => + serviceLocator.getLogger().warning(`Failed to persist the state value to ${key}`, { error }), + ), ); } diff --git a/packages/core/src/storages/request_list.ts b/packages/core/src/storages/request_list.ts index 73ad06a0fc56..7690f6ece0e2 100644 --- a/packages/core/src/storages/request_list.ts +++ b/packages/core/src/storages/request_list.ts @@ -4,7 +4,7 @@ import ow, { ArgumentError } from 'ow'; import type { Configuration } from '../configuration.js'; import { EventType } from '../events/event_manager.js'; -import { log } from '../log.js'; +import type { CrawleeLogger } from '../log.js'; import type { ProxyConfiguration } from '../proxy_configuration.js'; import { type InternalSource, Request, type RequestOptions, type Source } from '../request.js'; import { createDeserialize, serializeArray } from '../serialization.js'; @@ -305,7 +305,7 @@ export interface RequestListOptions { * @category Sources */ export class RequestList implements IRequestList { - private log = log.child({ prefix: 'RequestList' }); + private log: CrawleeLogger = serviceLocator.getLogger().child({ prefix: 'RequestList' }); /** * Array of all requests from all sources, in the order as they appeared in sources. diff --git a/packages/core/src/storages/request_manager_tandem.ts b/packages/core/src/storages/request_manager_tandem.ts index 0d1ad21ff32d..164bbba192ce 100644 --- a/packages/core/src/storages/request_manager_tandem.ts +++ b/packages/core/src/storages/request_manager_tandem.ts @@ -1,9 +1,8 @@ import type { Dictionary } from '@crawlee/types'; -import type { Log } from '@apify/log'; - -import { log } from '../log.js'; +import type { CrawleeLogger } from '../log.js'; import type { Request, Source } from '../request.js'; +import { serviceLocator } from '../service_locator.js'; import type { IRequestList } from './request_list.js'; import type { AddRequestsBatchedOptions, @@ -20,12 +19,12 @@ import type { * transfers them in batches to the RequestQueue. */ export class RequestManagerTandem implements IRequestManager { - private log: Log; + private log: CrawleeLogger; private requestList: IRequestList; private requestQueue: IRequestManager; constructor(requestList: IRequestList, requestQueue: IRequestManager) { - this.log = log.child({ prefix: 'RequestManagerTandem' }); + this.log = serviceLocator.getLogger().child({ prefix: 'RequestManagerTandem' }); this.requestList = requestList; this.requestQueue = requestQueue; } diff --git a/packages/core/src/storages/request_provider.ts b/packages/core/src/storages/request_provider.ts index fd47a02c424c..c2f940b2f088 100644 --- a/packages/core/src/storages/request_provider.ts +++ b/packages/core/src/storages/request_provider.ts @@ -23,13 +23,12 @@ import ow from 'ow'; import type { ReadonlyDeep } from 'type-fest'; import { ListDictionary, LruCache } from '@apify/datastructures'; -import type { Log } from '@apify/log'; import { cryptoRandomObjectId } from '@apify/utilities'; import { Configuration } from '../configuration.js'; import type { EventManager } from '../events/event_manager.js'; import { EventType } from '../events/event_manager.js'; -import { log } from '../log.js'; +import type { CrawleeLogger } from '../log.js'; import type { ProxyConfiguration } from '../proxy_configuration.js'; import type { InternalSource, RequestOptions, Source } from '../request.js'; import { Request } from '../request.js'; @@ -111,7 +110,7 @@ export abstract class RequestProvider implements IStorage, IRequestManager { client: RequestQueueClient; protected proxyConfiguration?: ProxyConfiguration; - log: Log; + log: CrawleeLogger; internalTimeoutMillis = 5 * 60_000; // defaults to 5 minutes, will be overridden by BasicCrawler requestLockSecs = 3 * 60; // defaults to 3 minutes, will be overridden by BasicCrawler @@ -156,7 +155,9 @@ export abstract class RequestProvider implements IStorage, IRequestManager { this.requestCache = new LruCache({ maxLength: options.requestCacheMaxSize }); this.recentlyHandledRequestsCache = new LruCache({ maxLength: options.recentlyHandledRequestsMaxSize }); - this.log = log.child({ prefix: `${options.logPrefix}(${this.id}, ${this.name ?? 'no-name'})` }); + this.log = serviceLocator + .getLogger() + .child({ prefix: `${options.logPrefix}(${this.id}, ${this.name ?? 'no-name'})` }); this.events.on(EventType.MIGRATING, async () => { this.queuePausedForMigration = true; diff --git a/packages/core/src/storages/sitemap_request_list.ts b/packages/core/src/storages/sitemap_request_list.ts index 1fcdbd3ed2bd..2fc61ded820d 100644 --- a/packages/core/src/storages/sitemap_request_list.ts +++ b/packages/core/src/storages/sitemap_request_list.ts @@ -6,11 +6,10 @@ import { minimatch } from 'minimatch'; import ow from 'ow'; import type { RequiredDeep } from 'type-fest'; -import defaultLog from '@apify/log'; - import type { GlobInput, RegExpInput, UrlPatternObject } from '../enqueue_links/shared.js'; import { constructGlobObjectsFromGlobs, constructRegExpObjectsFromRegExps } from '../enqueue_links/shared.js'; import { type EventManager, EventType } from '../events/event_manager.js'; +import type { CrawleeLogger } from '../log.js'; import { Request } from '../request.js'; import { serviceLocator } from '../service_locator.js'; import { KeyValueStore } from './key_value_store.js'; @@ -196,7 +195,7 @@ export class SitemapRequestList implements IRequestList { /** * Logger instance. */ - private log = defaultLog.child({ prefix: 'SitemapRequestList' }); + private log: CrawleeLogger; private urlExcludePatternObjects: UrlPatternObject[] = []; private urlPatternObjects: UrlPatternObject[] = []; @@ -230,6 +229,8 @@ export class SitemapRequestList implements IRequestList { const { globs, exclude, regexps } = options; + this.log = serviceLocator.getLogger().child({ prefix: 'SitemapRequestList' }); + if (exclude?.length) { for (const excl of exclude) { if (typeof excl === 'string' || 'glob' in excl) { diff --git a/packages/core/test/core/service_locator.test.ts b/packages/core/test/core/service_locator.test.ts index 8c4c6afb8d9a..b589af7439dd 100644 --- a/packages/core/test/core/service_locator.test.ts +++ b/packages/core/test/core/service_locator.test.ts @@ -1,6 +1,34 @@ -import { Configuration, LocalEventManager, ServiceConflictError, ServiceLocator, serviceLocator } from '@crawlee/core'; +import type { CrawleeLogger } from '@crawlee/core'; +import { + ApifyLogAdapter, + Configuration, + LocalEventManager, + ServiceConflictError, + ServiceLocator, + serviceLocator, +} from '@crawlee/core'; import { MemoryStorage } from '@crawlee/memory-storage'; +function makeMockLogger(overrides: Partial = {}): CrawleeLogger { + const logger: CrawleeLogger = { + getOptions: () => ({}), + setOptions: () => {}, + child: () => logger, + error: () => {}, + exception: () => {}, + softFail: () => {}, + warning: () => {}, + warningOnce: () => {}, + info: () => {}, + debug: () => {}, + perf: () => {}, + deprecated: () => {}, + internal: () => {}, + ...overrides, + }; + return logger; +} + // Reset global service locator before each test beforeEach(() => { serviceLocator.reset(); @@ -141,8 +169,59 @@ describe('ServiceLocator', () => { }); }); + describe('Logger', () => { + test('default logger returns an ApifyLogAdapter wrapping @apify/log', () => { + const defaultLogger = serviceLocator.getLogger(); + expect(defaultLogger).toBeInstanceOf(ApifyLogAdapter); + }); + + test('custom logger can be set', () => { + const customLogger = makeMockLogger(); + serviceLocator.setLogger(customLogger); + expect(serviceLocator.getLogger()).toBe(customLogger); + }); + + test('logger overwrite not possible', () => { + const firstLogger = makeMockLogger(); + serviceLocator.setLogger(firstLogger); + + const secondLogger = makeMockLogger(); + + expect(() => { + serviceLocator.setLogger(secondLogger); + }).toThrow(ServiceConflictError); + }); + + test('logger conflict', () => { + serviceLocator.getLogger(); + + const customLogger = makeMockLogger(); + + expect(() => { + serviceLocator.setLogger(customLogger); + }).toThrow(ServiceConflictError); + expect(() => { + serviceLocator.setLogger(customLogger); + }).toThrow(/Logger is already in use/); + }); + + test('reset clears the logger', () => { + const customLogger = makeMockLogger(); + serviceLocator.setLogger(customLogger); + expect(serviceLocator.getLogger()).toBe(customLogger); + + serviceLocator.reset(); + + // After reset, default ApifyLogAdapter should be returned + expect(serviceLocator.getLogger()).toBeInstanceOf(ApifyLogAdapter); + }); + }); + describe('Reset functionality', () => { test('reset clears all services', () => { + const customLogger = makeMockLogger(); + serviceLocator.setLogger(customLogger); + const customConfig = new Configuration({ headless: false }); const customEventManager = new LocalEventManager({ persistStateIntervalMillis: 1000, @@ -158,6 +237,7 @@ describe('ServiceLocator', () => { expect(serviceLocator.getConfiguration()).toBe(customConfig); expect(serviceLocator.getEventManager()).toBe(customEventManager); expect(serviceLocator.getStorageClient()).toBe(customStorageClient); + expect(serviceLocator.getLogger()).toBe(customLogger); // Reset serviceLocator.reset(); @@ -205,6 +285,47 @@ describe('ServiceLocator', () => { serviceLocator.setStorageClient(storageClient); }).not.toThrow(); }); + + test('setting same logger instance is allowed', () => { + const logger = makeMockLogger(); + serviceLocator.setLogger(logger); + serviceLocator.getLogger(); + + // Setting the same instance again should not throw + expect(() => { + serviceLocator.setLogger(logger); + }).not.toThrow(); + }); + }); + + describe('getChildLog', () => { + test('returns a child logger with the given prefix', () => { + const children: CrawleeLogger[] = []; + const mockLogger = makeMockLogger({ + child: (options) => { + const child = makeMockLogger({ getOptions: () => options }); + children.push(child); + return child; + }, + }); + serviceLocator.setLogger(mockLogger); + + const child = serviceLocator.getChildLog('Test Prefix'); + + expect(children).toHaveLength(1); + expect(child.getOptions()).toEqual({ prefix: 'Test Prefix' }); + }); + + test('delegates to the current service locator context', () => { + const crawlerLocator = new ServiceLocator(); + const mockLogger = makeMockLogger({ + child: (options) => makeMockLogger({ getOptions: () => options }), + }); + crawlerLocator.setLogger(mockLogger); + + const child = crawlerLocator.getChildLog('Crawler Module'); + expect(child.getOptions()).toEqual({ prefix: 'Crawler Module' }); + }); }); describe('Per-crawler ServiceLocator', () => { diff --git a/packages/core/test/log/base-crawlee-logger.test.ts b/packages/core/test/log/base-crawlee-logger.test.ts new file mode 100644 index 000000000000..fb30c100c2e9 --- /dev/null +++ b/packages/core/test/log/base-crawlee-logger.test.ts @@ -0,0 +1,173 @@ +import type { CrawleeLogger, CrawleeLoggerOptions } from '../../src/log.js'; +import { BaseCrawleeLogger, LogLevel } from '../../src/log.js'; + +/** Minimal concrete implementation for testing. */ +class TestLogger extends BaseCrawleeLogger { + logWithLevel(_level: number, _message: string, _data?: Record): void { + // Captured via vitest.spyOn in tests. + } + + protected createChild(options: Partial): CrawleeLogger { + return new TestLogger({ ...this.getOptions(), ...options }); + } +} + +function makeLogger(options: Partial = {}) { + const logger = new TestLogger(options); + const spy = vitest.spyOn(logger, 'logWithLevel'); + return { logger, spy }; +} + +describe('BaseCrawleeLogger', () => { + describe('getOptions / setOptions', () => { + test('returns options passed to constructor', () => { + const { logger } = makeLogger({ prefix: 'Test' }); + expect(logger.getOptions()).toMatchObject({ prefix: 'Test' }); + }); + + test('setOptions overwrites prefix', () => { + const { logger } = makeLogger({ prefix: 'Test' }); + logger.setOptions({ prefix: 'Updated' }); + expect(logger.getOptions().prefix).toBe('Updated'); + }); + }); + + describe('error()', () => { + test('calls logWithLevel with ERROR level and message', () => { + const { logger, spy } = makeLogger(); + logger.error('something broke'); + expect(spy).toHaveBeenCalledWith(LogLevel.ERROR, 'something broke', undefined); + }); + + test('passes data through', () => { + const { logger, spy } = makeLogger(); + logger.error('oops', { code: 42 }); + expect(spy).toHaveBeenCalledWith(LogLevel.ERROR, 'oops', { code: 42 }); + }); + }); + + describe('exception()', () => { + test('logs at ERROR level with combined message', () => { + const { logger, spy } = makeLogger(); + const err = new Error('disk full'); + logger.exception(err, 'Save failed'); + expect(spy).toHaveBeenCalledWith( + LogLevel.ERROR, + 'Save failed: disk full', + expect.objectContaining({ stack: err.stack }), + ); + }); + + test('merges extra data alongside stack', () => { + const { logger, spy } = makeLogger(); + const err = new Error('timeout'); + logger.exception(err, 'Request failed', { url: 'https://example.com' }); + expect(spy).toHaveBeenCalledWith( + LogLevel.ERROR, + 'Request failed: timeout', + expect.objectContaining({ url: 'https://example.com', stack: err.stack }), + ); + }); + }); + + describe('softFail()', () => { + test('calls logWithLevel with SOFT_FAIL level', () => { + const { logger, spy } = makeLogger(); + logger.softFail('non-critical'); + expect(spy).toHaveBeenCalledWith(LogLevel.SOFT_FAIL, 'non-critical', undefined); + }); + }); + + describe('warningOnce()', () => { + test('logs the first occurrence', () => { + const { logger, spy } = makeLogger(); + logger.warningOnce('only once'); + expect(spy).toHaveBeenCalledOnce(); + }); + + test('suppresses subsequent identical messages', () => { + const { logger, spy } = makeLogger(); + logger.warningOnce('only once'); + logger.warningOnce('only once'); + logger.warningOnce('only once'); + expect(spy).toHaveBeenCalledOnce(); + }); + + test('treats different messages independently', () => { + const { logger, spy } = makeLogger(); + logger.warningOnce('message A'); + logger.warningOnce('message B'); + expect(spy).toHaveBeenCalledTimes(2); + }); + }); + + describe('perf()', () => { + test('prepends [PERF] to the message', () => { + const { logger, spy } = makeLogger(); + logger.perf('render took 20ms'); + expect(spy).toHaveBeenCalledWith(LogLevel.PERF, '[PERF] render took 20ms', undefined); + }); + }); + + describe('deprecated()', () => { + test('logs with [DEPRECATED] prefix', () => { + const { logger, spy } = makeLogger(); + logger.deprecated('use newFn() instead'); + expect(spy).toHaveBeenCalledWith(LogLevel.WARNING, '[DEPRECATED] use newFn() instead', undefined); + }); + + test('only logs once per message', () => { + const { logger, spy } = makeLogger(); + logger.deprecated('use newFn() instead'); + logger.deprecated('use newFn() instead'); + expect(spy).toHaveBeenCalledOnce(); + }); + + test('different deprecated messages are each logged once', () => { + const { logger, spy } = makeLogger(); + logger.deprecated('old api A'); + logger.deprecated('old api B'); + expect(spy).toHaveBeenCalledTimes(2); + }); + }); + + describe('logWithLevel()', () => { + test('dispatches at the given level', () => { + const { logger, spy } = makeLogger(); + logger.logWithLevel(LogLevel.WARNING, 'log warning'); + expect(spy).toHaveBeenCalledWith(LogLevel.WARNING, 'log warning'); + }); + + test('passes data through', () => { + const { logger, spy } = makeLogger(); + logger.logWithLevel(LogLevel.ERROR, 'log error', { key: 'val' }); + expect(spy).toHaveBeenCalledWith(LogLevel.ERROR, 'log error', { key: 'val' }); + }); + }); + + describe('child()', () => { + test('returns a new logger instance', () => { + const { logger } = makeLogger(); + const child = logger.child({ prefix: 'Child' }); + expect(child).not.toBe(logger); + }); + + test('child inherits parent options', () => { + const { logger } = makeLogger({ prefix: 'Parent' }); + const child = logger.child({ prefix: 'Child' }) as TestLogger; + expect(child.getOptions()).toMatchObject({ prefix: 'Child' }); + }); + + test('child has independent warningOnce deduplication', () => { + const { logger } = makeLogger(); + const child = logger.child({ prefix: 'Child' }) as TestLogger; + const childSpy = vitest.spyOn(child as TestLogger, 'logWithLevel'); + + logger.warningOnce('shared warning'); + + // Child hasn't logged it yet — should log independently + child.warningOnce('shared warning'); + expect(childSpy).toHaveBeenCalledOnce(); + }); + }); +}); diff --git a/packages/jsdom-crawler/src/internals/jsdom-crawler.ts b/packages/jsdom-crawler/src/internals/jsdom-crawler.ts index 2d008ba59934..b229101027cb 100644 --- a/packages/jsdom-crawler/src/internals/jsdom-crawler.ts +++ b/packages/jsdom-crawler/src/internals/jsdom-crawler.ts @@ -242,7 +242,7 @@ export class JSDOMCrawler< return this.virtualConsole; } - private readonly jsdomErrorHandler = (error: Error) => this.log.debug('JSDOM error from console', error); + private readonly jsdomErrorHandler = (error: Error) => this.log.debug('JSDOM error from console', { error }); private async parseContent(crawlingContext: InternalHttpCrawlingContext) { const isXml = crawlingContext.contentType.type.includes('xml'); diff --git a/packages/playwright-crawler/src/internals/adaptive-playwright-crawler.ts b/packages/playwright-crawler/src/internals/adaptive-playwright-crawler.ts index 549920703d08..5e5e028bcfd4 100644 --- a/packages/playwright-crawler/src/internals/adaptive-playwright-crawler.ts +++ b/packages/playwright-crawler/src/internals/adaptive-playwright-crawler.ts @@ -7,6 +7,7 @@ import type { CheerioCrawlingContext } from '@crawlee/cheerio'; import { CheerioCrawler } from '@crawlee/cheerio'; import type { ContextPipeline, + CrawleeLogger, CrawlingContext, EnqueueLinksOptions, GetUserDataFromRequest, @@ -33,7 +34,6 @@ import type { AnyNode } from 'domhandler'; import type { Page } from 'playwright'; import type { SetRequired } from 'type-fest'; -import type { Log } from '@apify/log'; import { addTimeoutToPromise } from '@apify/timeout'; import type { PlaywrightCrawlingContext, PlaywrightGotoOptions } from './playwright-crawler.js'; @@ -232,7 +232,7 @@ const proxyLogMethods = [ 'deprecated', ] as const; -type LogProxyCall = [log: Log, method: (typeof proxyLogMethods)[number], ...args: unknown[]]; +type LogProxyCall = [log: CrawleeLogger, method: (typeof proxyLogMethods)[number], ...args: unknown[]]; /** * An extension of {@apilink PlaywrightCrawler} that uses a more limited request handler interface so that it is able to switch to HTTP-only crawling when it detects it may be possible. @@ -725,9 +725,9 @@ export class AdaptivePlaywrightCrawler< return await this.enqueueLinksWithCrawlDepth({ ...options, baseUrl }, request, mockRequestQueue); } - private createLogProxy(log: Log, logs: LogProxyCall[]) { + private createLogProxy(log: CrawleeLogger, logs: LogProxyCall[]) { return new Proxy(log, { - get(target: Log, propertyName: (typeof proxyLogMethods)[number], receiver: any) { + get(target: CrawleeLogger, propertyName: (typeof proxyLogMethods)[number], receiver: any) { if (proxyLogMethods.includes(propertyName)) { return (...args: unknown[]) => { logs.push([target, propertyName, ...args]); diff --git a/packages/playwright-crawler/src/internals/enqueue-links/click-elements.ts b/packages/playwright-crawler/src/internals/enqueue-links/click-elements.ts index 6ec69bfecaab..aad88a8b4d49 100644 --- a/packages/playwright-crawler/src/internals/enqueue-links/click-elements.ts +++ b/packages/playwright-crawler/src/internals/enqueue-links/click-elements.ts @@ -18,15 +18,14 @@ import { createRequestOptions, filterRequestOptionsByPatterns, Request as CrawleeRequest, + serviceLocator, } from '@crawlee/browser'; import type { BatchAddRequestsResult, Dictionary } from '@crawlee/types'; import ow from 'ow'; import type { Frame, Page, Request, Route } from 'playwright'; -import log_ from '@apify/log'; - const STARTING_Z_INDEX = 2147400000; -const log = log_.child({ prefix: 'Playwright Click Elements' }); +const getLog = () => serviceLocator.getChildLog('Playwright Click Elements'); type ClickOptions = Parameters[1]; @@ -295,7 +294,7 @@ export async function enqueueLinksByClickingElements( } if (pseudoUrls?.length) { - log.deprecated('`pseudoUrls` option is deprecated, use `globs` or `regexps` instead'); + serviceLocator.getLogger().deprecated('`pseudoUrls` option is deprecated, use `globs` or `regexps` instead'); urlPatternObjects.push(...constructRegExpObjectsFromPseudoUrls(pseudoUrls)); } @@ -431,7 +430,9 @@ function createTargetCreatedHandler(requests: Set): (popup: Page) => Pro try { await popup.close(); } catch (err) { - log.debug('enqueueLinksByClickingElements: Could not close spawned page.', { error: (err as Error).stack }); + getLog().debug('enqueueLinksByClickingElements: Could not close spawned page.', { + error: (err as Error).stack, + }); } }; } @@ -517,7 +518,7 @@ function updateElementCssToEnableMouseClick(el: Element, zIndex: number): void { */ export async function clickElements(page: Page, selector: string, clickOptions?: ClickOptions): Promise { const elementHandles = await page.$$(selector); - log.debug(`enqueueLinksByClickingElements: There are ${elementHandles.length} elements to click.`); + getLog().debug(`enqueueLinksByClickingElements: There are ${elementHandles.length} elements to click.`); let clickedElementsCount = 0; let zIndex = STARTING_Z_INDEX; let shouldLogWarning = true; @@ -529,17 +530,17 @@ export async function clickElements(page: Page, selector: string, clickOptions?: } catch (err) { const e = err as Error; if (shouldLogWarning && e.stack!.includes('is detached from document')) { - log.warning( + getLog().warning( `An element with selector ${selector} that you're trying to click has been removed from the page. ` + 'This was probably caused by an earlier click which triggered some JavaScript on the page that caused it to change. ' + 'If you\'re trying to enqueue pagination links, we suggest using the "next" button, if available and going one by one.', ); shouldLogWarning = false; } - log.debug('enqueueLinksByClickingElements: Click failed.', { stack: e.stack }); + getLog().debug('enqueueLinksByClickingElements: Click failed.', { stack: e.stack }); } } - log.debug( + getLog().debug( `enqueueLinksByClickingElements: Successfully clicked ${clickedElementsCount} elements out of ${elementHandles.length}`, ); } @@ -578,7 +579,7 @@ async function waitForPageIdle({ } function maxTimeoutHandler() { - log.debug( + getLog().debug( `enqueueLinksByClickingElements: Page still showed activity after ${maxWaitForPageIdleMillis}ms. ` + 'This is probably due to the website itself dispatching requests, but some links may also have been missed.', ); @@ -614,7 +615,7 @@ async function restoreHistoryNavigationAndSaveCapturedUrls(page: Page, requests: const url = new URL(stateUrl, page.url()).href; requests.add(JSON.stringify({ url })); } catch (err) { - log.debug('enqueueLinksByClickingElements: Failed to ', { error: (err as Error).stack }); + getLog().debug('enqueueLinksByClickingElements: Failed to ', { error: (err as Error).stack }); } }); } diff --git a/packages/playwright-crawler/src/internals/utils/playwright-utils.ts b/packages/playwright-crawler/src/internals/utils/playwright-utils.ts index 64a91d1a48a1..9255e516cb54 100644 --- a/packages/playwright-crawler/src/internals/utils/playwright-utils.ts +++ b/packages/playwright-crawler/src/internals/utils/playwright-utils.ts @@ -22,7 +22,15 @@ import { readFile } from 'node:fs/promises'; import { createRequire } from 'node:module'; import vm from 'node:vm'; -import { Configuration, KeyValueStore, type Request, type Session, SessionError, validators } from '@crawlee/browser'; +import { + Configuration, + KeyValueStore, + type Request, + serviceLocator, + type Session, + SessionError, + validators, +} from '@crawlee/browser'; import type { BatchAddRequestsResult } from '@crawlee/types'; import { type CheerioRoot, type Dictionary, expandShadowRoots, sleep } from '@crawlee/utils'; import * as cheerio from 'cheerio'; @@ -30,13 +38,12 @@ import ow from 'ow'; import type { Page, Response, Route } from 'playwright'; import { LruCache } from '@apify/datastructures'; -import log_ from '@apify/log'; import type { EnqueueLinksByClickingElementsOptions } from '../enqueue-links/click-elements.js'; import { enqueueLinksByClickingElements } from '../enqueue-links/click-elements.js'; import { RenderingTypePredictor } from './rendering-type-prediction.js'; -const log = log_.child({ prefix: 'Playwright Utils' }); +const getLog = () => serviceLocator.getChildLog('Playwright Utils'); const require = createRequire(import.meta.url); const jqueryPath = require.resolve('jquery'); @@ -105,7 +112,7 @@ export async function injectFile(page: Page, filePath: string, options: InjectFi page.on('framenavigated', async () => page .evaluate(contents) - .catch((error) => log.warning('An error occurred during the script injection!', { error })), + .catch((error) => getLog().warning('An error occurred during the script injection!', { error })), ); } @@ -200,7 +207,7 @@ export async function gotoExtended( if (method !== 'GET' || payload || !isEmpty(headers)) { // This is not deprecated, we use it to log only once. - log.deprecated( + getLog().deprecated( 'Using other request methods than GET, rewriting headers and adding payloads has a high impact on performance ' + 'in recent versions of Playwright. Use only when necessary.', ); @@ -221,7 +228,7 @@ export async function gotoExtended( if (!isEmpty(headers)) overrides.headers = headers; await route.continue(overrides); } catch (error) { - log.debug('Error inside request interceptor', { error }); + getLog().debug('Error inside request interceptor', { error }); } return undefined; @@ -300,7 +307,7 @@ export async function blockRequests(page: Page, options: BlockRequestsOptions = await client.send('Network.enable'); await client.send('Network.setBlockedURLs', { urls: patternsToBlock }); } catch { - log.warning('blockRequests() helper is incompatible with non-Chromium browsers.'); + getLog().warning('blockRequests() helper is incompatible with non-Chromium browsers.'); } } @@ -344,7 +351,7 @@ export function compileScript(scriptString: string, context: Dictionary = Object try { func = vm.runInNewContext(funcString, context); // "Secure" the context by removing prototypes, unless custom context is provided. } catch (err) { - log.exception(err as Error, 'Cannot compile script!'); + getLog().exception(err as Error, 'Cannot compile script!'); throw err; } @@ -634,7 +641,7 @@ export async function parseWithCheerio( }, contents); } } catch (error) { - log.warning(`Failed to extract iframe content: ${error}`); + getLog().warning(`Failed to extract iframe content: ${error}`); } }), ); @@ -655,7 +662,7 @@ async function getIdcacPlaywright() { try { idcacPlaywright = await import('idcac-playwright'); } catch (error: any) { - log.warning(`Failed to import 'idcac-playwright'. + getLog().warning(`Failed to import 'idcac-playwright'. We recently made idcac-playwright an optional dependency due to licensing issues. To use this feature, please install it manually by running @@ -762,7 +769,7 @@ async function handleCloudflareChallenge( } const logLevel = options.verbose ? 'info' : 'debug'; - log[logLevel]( + getLog()[logLevel]( `Detected Cloudflare challenge at ${url}, trying to solve it. This can take up to ${10 + (options.sleepSecs ?? 10)} seconds.`, ); @@ -802,7 +809,7 @@ async function handleCloudflareChallenge( const xRandomized = x + randomOffset(10); const yRandomized = y + randomOffset(10); - log[logLevel](`Trying to click on the Cloudflare checkbox at ${url}`, { x: xRandomized, y: yRandomized }); + getLog()[logLevel](`Trying to click on the Cloudflare checkbox at ${url}`, { x: xRandomized, y: yRandomized }); await page.mouse.click(xRandomized, yRandomized); // sometimes the checkbox is lower (could be caused by a lag when rendering the logo) diff --git a/packages/puppeteer-crawler/src/internals/enqueue-links/click-elements.ts b/packages/puppeteer-crawler/src/internals/enqueue-links/click-elements.ts index fd1e9d56fe91..44cb0efae004 100644 --- a/packages/puppeteer-crawler/src/internals/enqueue-links/click-elements.ts +++ b/packages/puppeteer-crawler/src/internals/enqueue-links/click-elements.ts @@ -18,17 +18,16 @@ import { createRequestOptions, filterRequestOptionsByPatterns, Request, + serviceLocator, } from '@crawlee/browser'; import type { BatchAddRequestsResult, Dictionary } from '@crawlee/types'; import ow from 'ow'; import type { ClickOptions, Frame, HTTPRequest as PuppeteerRequest, Page, Target } from 'puppeteer'; -import log_ from '@apify/log'; - import { addInterceptRequestHandler, removeInterceptRequestHandler } from '../utils/puppeteer_request_interception.js'; const STARTING_Z_INDEX = 2147400000; -const log = log_.child({ prefix: 'Puppeteer Click Elements' }); +const getLog = () => serviceLocator.getChildLog('Puppeteer Click Elements'); export interface EnqueueLinksByClickingElementsOptions { /** @@ -295,7 +294,7 @@ export async function enqueueLinksByClickingElements( } if (pseudoUrls?.length) { - log.deprecated('`pseudoUrls` option is deprecated, use `globs` or `regexps` instead'); + getLog().deprecated('`pseudoUrls` option is deprecated, use `globs` or `regexps` instead'); urlPatternObjects.push(...constructRegExpObjectsFromPseudoUrls(pseudoUrls)); } @@ -439,7 +438,9 @@ function createTargetCreatedHandler(page: Page, requests: Set): (target: const createdPage = await target.page(); await createdPage!.close(); } catch (err) { - log.debug('enqueueLinksByClickingElements: Could not close spawned page.', { error: (err as Error).stack }); + getLog().debug('enqueueLinksByClickingElements: Could not close spawned page.', { + error: (err as Error).stack, + }); } }; } @@ -509,7 +510,7 @@ async function preventHistoryNavigation(page: Page): Promise { */ export async function clickElements(page: Page, selector: string, clickOptions?: ClickOptions): Promise { const elementHandles = await page.$$(selector); - log.debug(`enqueueLinksByClickingElements: There are ${elementHandles.length} elements to click.`); + getLog().debug(`enqueueLinksByClickingElements: There are ${elementHandles.length} elements to click.`); let clickedElementsCount = 0; let zIndex = STARTING_Z_INDEX; let shouldLogWarning = true; @@ -521,17 +522,17 @@ export async function clickElements(page: Page, selector: string, clickOptions?: } catch (err) { const e = err as Error; if (shouldLogWarning && e.stack!.includes('is detached from document')) { - log.warning( + getLog().warning( `An element with selector ${selector} that you're trying to click has been removed from the page. ` + 'This was probably caused by an earlier click which triggered some JavaScript on the page that caused it to change. ' + 'If you\'re trying to enqueue pagination links, we suggest using the "next" button, if available and going one by one.', ); shouldLogWarning = false; } - log.debug('enqueueLinksByClickingElements: Click failed.', { stack: e.stack }); + getLog().debug('enqueueLinksByClickingElements: Click failed.', { stack: e.stack }); } } - log.debug( + getLog().debug( `enqueueLinksByClickingElements: Successfully clicked ${clickedElementsCount} elements out of ${elementHandles.length}`, ); } @@ -590,7 +591,7 @@ async function waitForPageIdle({ } function maxTimeoutHandler() { - log.debug( + getLog().debug( `enqueueLinksByClickingElements: Page still showed activity after ${maxWaitForPageIdleMillis}ms. ` + 'This is probably due to the website itself dispatching requests, but some links may also have been missed.', ); @@ -628,7 +629,7 @@ async function restoreHistoryNavigationAndSaveCapturedUrls(page: Page, requests: const url = new URL(stateUrl, page.url()).href; requests.add(JSON.stringify({ url })); } catch (err) { - log.debug('enqueueLinksByClickingElements: Failed to ', { error: (err as Error).stack }); + getLog().debug('enqueueLinksByClickingElements: Failed to ', { error: (err as Error).stack }); } }); } diff --git a/packages/puppeteer-crawler/src/internals/utils/puppeteer_request_interception.ts b/packages/puppeteer-crawler/src/internals/utils/puppeteer_request_interception.ts index d5b1f44a38dc..e5d82a3b3881 100644 --- a/packages/puppeteer-crawler/src/internals/utils/puppeteer_request_interception.ts +++ b/packages/puppeteer-crawler/src/internals/utils/puppeteer_request_interception.ts @@ -1,11 +1,10 @@ import { EventEmitter } from 'node:events'; +import { serviceLocator } from '@crawlee/browser'; import type { Dictionary } from '@crawlee/utils'; import ow from 'ow'; import type { HTTPRequest, HTTPRequest as PuppeteerRequest, Page } from 'puppeteer'; -import log from '@apify/log'; - export type InterceptHandler = (request: PuppeteerRequest) => unknown; // We use weak maps here so that the content gets discarded after page gets closed. @@ -222,7 +221,7 @@ export async function removeInterceptRequestHandler(page: Page, handler: Interce await disableRequestInterception(page); interceptedRequestsInProgress.removeListener('delete', onDelete); } catch (error) { - log.debug('Error while disabling request interception', { error }); + serviceLocator.getLogger().debug('Error while disabling request interception', { error }); } } }; diff --git a/packages/puppeteer-crawler/src/internals/utils/puppeteer_utils.ts b/packages/puppeteer-crawler/src/internals/utils/puppeteer_utils.ts index a2d8b7ee855a..b3e07b2fdcc0 100644 --- a/packages/puppeteer-crawler/src/internals/utils/puppeteer_utils.ts +++ b/packages/puppeteer-crawler/src/internals/utils/puppeteer_utils.ts @@ -23,7 +23,7 @@ import { createRequire } from 'node:module'; import vm from 'node:vm'; import type { Request } from '@crawlee/browser'; -import { Configuration, KeyValueStore, validators } from '@crawlee/browser'; +import { Configuration, KeyValueStore, serviceLocator, validators } from '@crawlee/browser'; import type { BatchAddRequestsResult, Dictionary } from '@crawlee/types'; import { type CheerioRoot, expandShadowRoots, sleep } from '@crawlee/utils'; import * as cheerio from 'cheerio'; @@ -32,7 +32,6 @@ import ow from 'ow'; import type { HTTPRequest as PuppeteerRequest, HTTPResponse, Page, ResponseForRequest } from 'puppeteer'; import { LruCache } from '@apify/datastructures'; -import log_ from '@apify/log'; import type { EnqueueLinksByClickingElementsOptions } from '../enqueue-links/click-elements.js'; import { enqueueLinksByClickingElements } from '../enqueue-links/click-elements.js'; @@ -45,7 +44,7 @@ const jqueryPath = require.resolve('jquery'); const MAX_INJECT_FILE_CACHE_SIZE = 10; const DEFAULT_BLOCK_REQUEST_URL_PATTERNS = ['.css', '.jpg', '.jpeg', '.png', '.svg', '.gif', '.woff', '.pdf', '.zip']; -const log = log_.child({ prefix: 'Puppeteer Utils' }); +const getLog = () => serviceLocator.getChildLog('Puppeteer Utils'); export interface DirectNavigationOptions { /** @@ -139,7 +138,7 @@ export async function injectFile(page: Page, filePath: string, options: InjectFi page.on('framenavigated', async () => page .evaluate(contents) - .catch((error) => log.warning('An error occurred during the script injection!', { error })), + .catch((error) => getLog().warning('An error occurred during the script injection!', { error })), ); } @@ -224,7 +223,7 @@ export async function parseWithCheerio( }, contents); } } catch (error) { - log.warning(`Failed to extract iframe content: ${error}`); + getLog().warning(`Failed to extract iframe content: ${error}`); } }), ); @@ -333,10 +332,12 @@ export async function sendCDPCommand( * @deprecated */ export const blockResources = async (page: Page, resourceTypes = ['stylesheet', 'font', 'image', 'media']) => { - log.deprecated( - 'utils.puppeteer.blockResources() has a high impact on performance in recent versions of Puppeteer. ' + - 'Until this resolves, please use utils.puppeteer.blockRequests()', - ); + serviceLocator + .getLogger() + .deprecated( + 'utils.puppeteer.blockResources() has a high impact on performance in recent versions of Puppeteer. ' + + 'Until this resolves, please use utils.puppeteer.blockRequests()', + ); await addInterceptRequestHandler(page, async (request) => { const type = request.resourceType(); if (resourceTypes.includes(type)) await request.abort(); @@ -369,10 +370,12 @@ export async function cacheResponses( ow(cache, ow.object); ow(responseUrlRules, ow.array.ofType(ow.any(ow.string, ow.regExp))); - log.deprecated( - 'utils.puppeteer.cacheResponses() has a high impact on performance ' + - "in recent versions of Puppeteer so it's use is discouraged until this issue resolves.", - ); + serviceLocator + .getLogger() + .deprecated( + 'utils.puppeteer.cacheResponses() has a high impact on performance ' + + "in recent versions of Puppeteer so it's use is discouraged until this issue resolves.", + ); await addInterceptRequestHandler(page, async (request) => { const url = request.url(); @@ -445,7 +448,7 @@ export function compileScript(scriptString: string, context: Dictionary = Object try { func = vm.runInNewContext(funcString, context); // "Secure" the context by removing prototypes, unless custom context is provided. } catch (err) { - log.exception(err as Error, 'Cannot compile script!'); + getLog().exception(err as Error, 'Cannot compile script!'); throw err; } @@ -494,10 +497,12 @@ export async function gotoExtended( if (method !== 'GET' || payload || !isEmpty(headers)) { // This is not deprecated, we use it to log only once. - log.deprecated( - 'Using other request methods than GET, rewriting headers and adding payloads has a high impact on performance ' + - 'in recent versions of Puppeteer. Use only when necessary.', - ); + serviceLocator + .getLogger() + .deprecated( + 'Using other request methods than GET, rewriting headers and adding payloads has a high impact on performance ' + + 'in recent versions of Puppeteer. Use only when necessary.', + ); let wasCalled = false; const interceptRequestHandler = async (interceptedRequest: PuppeteerRequest) => { // We want to ensure that this won't get executed again in a case that there is a subsequent request @@ -786,7 +791,7 @@ async function getIdcacPlaywright() { try { idcacPlaywright = await import('idcac-playwright'); } catch (error: any) { - log.warning(`Failed to import 'idcac-playwright'. + getLog().warning(`Failed to import 'idcac-playwright'. We recently made idcac-playwright an optional dependency due to licensing issues. To use this feature, please install it manually by running diff --git a/test/core/crawlers/basic_crawler.test.ts b/test/core/crawlers/basic_crawler.test.ts index 865523a25e78..d5a8c3fd05f3 100644 --- a/test/core/crawlers/basic_crawler.test.ts +++ b/test/core/crawlers/basic_crawler.test.ts @@ -436,10 +436,10 @@ describe('BasicCrawler', () => { }); } - const loggerSpy = vitest.spyOn(log, 'warning'); - const [crawler1, crawler2] = [createCrawler(), createCrawler()]; + const loggerSpy = vitest.spyOn(serviceLocator.getLogger(), 'warningOnce'); + await crawler1.run([`http://${HOSTNAME}:${port}/`]); await crawler2.run([`http://${HOSTNAME}:${port}/?page=2`]); @@ -454,6 +454,44 @@ describe('BasicCrawler', () => { expect(loggerSpy).toBeCalledWith(expect.stringContaining('Multiple crawler instances are calling useState()')); }); + test('shared-state warning is emitted only once regardless of crawler count', async () => { + // This test guards against a regression where per-instance loggers were used + // for a class-level (static) concern: each crawler would emit the warning + // independently, producing N warnings for N crawlers instead of just one. + + // Clear the global logger's dedup state so this test is isolated from others. + (log as any).warningsOnceLogged.clear(); + + // Spy on the underlying warning dispatch to count actual emissions. + const warningSpy = vitest.spyOn(serviceLocator.getLogger(), 'warning'); + const crawlers = [ + new BasicCrawler({ + requestHandler: async ({ useState }) => { + await useState({ count: 0 }); + }, + }), + new BasicCrawler({ + requestHandler: async ({ useState }) => { + await useState({ count: 0 }); + }, + }), + new BasicCrawler({ + requestHandler: async ({ useState }) => { + await useState({ count: 0 }); + }, + }), + ]; + + await crawlers[0].run([`http://${HOSTNAME}:${port}/`]); + await crawlers[1].run([`http://${HOSTNAME}:${port}/?page=2`]); + await crawlers[2].run([`http://${HOSTNAME}:${port}/?page=3`]); + + const sharedStateWarnings = warningSpy.mock.calls.filter( + ([msg]) => typeof msg === 'string' && msg.includes('Multiple crawler instances are calling useState()'), + ); + expect(sharedStateWarnings).toHaveLength(1); + }); + test('crawlers with explicit id have isolated state', async () => { function createCrawler(id: string) { return new BasicCrawler({ diff --git a/test/core/crawlers/cheerio_crawler.test.ts b/test/core/crawlers/cheerio_crawler.test.ts index 30bb5ec03b76..5181fa7e7fa8 100644 --- a/test/core/crawlers/cheerio_crawler.test.ts +++ b/test/core/crawlers/cheerio_crawler.test.ts @@ -11,6 +11,7 @@ import { RequestList, Session, } from '@crawlee/cheerio'; +import { BaseCrawleeLogger } from '@crawlee/core'; import { ImpitHttpClient } from '@crawlee/impit-client'; import type { ProxyInfo } from '@crawlee/types'; import type { Dictionary } from '@crawlee/utils'; @@ -20,7 +21,7 @@ import iconv from 'iconv-lite'; import { responseSamples, runExampleComServer } from 'test/shared/_helper.js'; import { MemoryStorageEmulator } from 'test/shared/MemoryStorageEmulator.js'; -import log, { Log } from '@apify/log'; +import log from '@apify/log'; let server: Server; let port: number; @@ -1111,7 +1112,7 @@ describe('CheerioCrawler', () => { }); test('mergeCookies()', async () => { - const warningSpy = vitest.spyOn(Log.prototype, 'warningOnce'); + const warningSpy = vitest.spyOn(BaseCrawleeLogger.prototype, 'warningOnce'); const cookie1 = mergeCookies('https://example.com', [ 'foo=bar1; other=cookie1 ; coo=kie', 'foo=bar2; baz=123', diff --git a/test/core/session_pool/session_pool.test.ts b/test/core/session_pool/session_pool.test.ts index 1d6bc31a24ba..a0d5fb9f6634 100644 --- a/test/core/session_pool/session_pool.test.ts +++ b/test/core/session_pool/session_pool.test.ts @@ -1,9 +1,7 @@ -import { EventType, KeyValueStore, serviceLocator, Session, SessionPool } from '@crawlee/core'; +import { BaseCrawleeLogger, EventType, KeyValueStore, serviceLocator, Session, SessionPool } from '@crawlee/core'; import { entries } from '@crawlee/utils'; import { MemoryStorageEmulator } from 'test/shared/MemoryStorageEmulator.js'; -import { Log } from '@apify/log'; - describe('SessionPool - testing session pool', () => { let sessionPool: SessionPool; const localStorageEmulator = new MemoryStorageEmulator(); @@ -58,7 +56,7 @@ describe('SessionPool - testing session pool', () => { }); // log is appended to sessionOptions after sessionPool instantiation // @ts-expect-error private symbol - expect(sessionPool.sessionOptions).toEqual({ ...opts.sessionOptions, log: expect.any(Log) }); + expect(sessionPool.sessionOptions).toEqual({ ...opts.sessionOptions, log: expect.any(BaseCrawleeLogger) }); }); test('should work using SessionPool.open', async () => { @@ -85,7 +83,7 @@ describe('SessionPool - testing session pool', () => { }); // log is appended to sessionOptions after sessionPool instantiation // @ts-expect-error private symbol - expect(sessionPool.sessionOptions).toEqual({ ...opts.sessionOptions, log: expect.any(Log) }); + expect(sessionPool.sessionOptions).toEqual({ ...opts.sessionOptions, log: expect.any(BaseCrawleeLogger) }); }); describe('should retrieve session', () => {