Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
61 commits
Select commit Hold shift + click to select a range
4ad3540
decouples logger from @apify/log
l2ysho Feb 5, 2026
4c92be2
update .gitignore
l2ysho Feb 5, 2026
90fbb0c
feat: add connectOptions and connectOverCDPOptions to PlaywrightCrawler
l2ysho Feb 17, 2026
21a7f13
Merge branch 'v4' into 3068-decouple-log-configuration-from-apifylog
l2ysho Feb 18, 2026
7711a16
refactor to make build work
l2ysho Feb 18, 2026
3e75bdc
lint fix
l2ysho Feb 18, 2026
f2c18c9
add abstract class to simplify implementation
l2ysho Feb 19, 2026
26d101e
Fix docs
l2ysho Feb 19, 2026
5750725
fix log spy in test
l2ysho Feb 19, 2026
e10bff2
Revert "feat: add connectOptions and connectOverCDPOptions to Playwri…
l2ysho Feb 19, 2026
f1f1fac
Fix Request logger
l2ysho Feb 19, 2026
1d78eda
cleanup + refactor
l2ysho Feb 19, 2026
d8cb997
refactor
l2ysho Feb 19, 2026
ab44350
cleanup
l2ysho Feb 19, 2026
3ae62b2
fix request log
l2ysho Feb 19, 2026
d259010
fix minimal level
l2ysho Feb 19, 2026
62a2934
fix request tandem
l2ysho Feb 19, 2026
8175e7b
add test for BaseCrawleeLogger
l2ysho Feb 19, 2026
d785449
fix test
l2ysho Feb 19, 2026
b3875f6
lint fix
l2ysho Feb 20, 2026
fbaf3a5
fix warningOnece
l2ysho Feb 20, 2026
d03dcf3
fix tests
l2ysho Feb 20, 2026
bbf293f
revert gitignore
l2ysho Feb 21, 2026
78380e8
Merge branch 'v4' into 3068-decouple-log-configuration-from-apifylog
l2ysho Feb 23, 2026
84b256f
after merge refactor
l2ysho Feb 23, 2026
439e67b
fix leftover imports
l2ysho Feb 24, 2026
37788ad
lint:fix
l2ysho Feb 24, 2026
67616a3
update snapshotter
l2ysho Feb 24, 2026
3c4fb54
lint:fix
l2ysho Feb 24, 2026
96eca15
lint:fix
l2ysho Feb 24, 2026
2b8974e
update recoverable_state
l2ysho Feb 24, 2026
cbabbae
refactor test
l2ysho Feb 24, 2026
fb555ee
remove dead import
l2ysho Feb 24, 2026
fd22e54
small fix in tests
l2ysho Feb 24, 2026
167e14a
fix tests
l2ysho Feb 24, 2026
45f03df
Remove useless enum
l2ysho Feb 24, 2026
c026c58
remove unused import
l2ysho Feb 24, 2026
7c20454
lint:fix
l2ysho Feb 24, 2026
9ccf389
decouple some more packages
l2ysho Feb 24, 2026
9f838ff
lint:fix
l2ysho Feb 24, 2026
765bfea
lint:fix
l2ysho Feb 24, 2026
d15376a
lint:fix
l2ysho Feb 24, 2026
6bc344d
fix warningOnce spawning multiple childs
l2ysho Feb 25, 2026
1957a9c
lint:fix
l2ysho Feb 25, 2026
60e3c30
revert configuration changes
l2ysho Feb 25, 2026
8cc7216
update tests
l2ysho Feb 25, 2026
b56653a
cleanup test
l2ysho Feb 26, 2026
5b81475
refactor(log): rename internal logger methods
l2ysho Feb 27, 2026
7b3c158
refactor _log to log + remove loggerProvider from Conf + cleanup
l2ysho Feb 27, 2026
ee32484
lint:fix
l2ysho Feb 27, 2026
f2944aa
lint:fix
l2ysho Feb 27, 2026
755261a
enable logger conflict
l2ysho Feb 27, 2026
a942bd2
lint fix
l2ysho Feb 27, 2026
9deb0c8
fix getLog
l2ysho Feb 27, 2026
82b688d
internal -> logWithLevel
l2ysho Feb 27, 2026
f4361bc
lint:fix
l2ysho Feb 27, 2026
4b67118
add child logger
l2ysho Feb 27, 2026
87bab27
fix tests
l2ysho Feb 27, 2026
3c8fdbb
fix tests
l2ysho Feb 27, 2026
21499bf
fix test
l2ysho Feb 27, 2026
00c5fbb
lint fix
l2ysho Feb 27, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/examples/file_download_stream.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import { pipeline, Transform } from 'stream';

import { FileDownload, type Log } from 'crawlee';
import { FileDownload, type CrawleeLogger } from 'crawlee';

// A sample Transform stream logging the download progress.
function createProgressTracker({ url, log, totalBytes }: { url: URL; log: Log; totalBytes: number }) {
function createProgressTracker({ url, log, totalBytes }: { url: URL; log: CrawleeLogger; totalBytes: number }) {
let downloadedBytes = 0;

return new Transform({
Expand Down
43 changes: 28 additions & 15 deletions packages/basic-crawler/src/internals/basic-crawler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import type {
AddRequestsBatchedResult,
AutoscaledPoolOptions,
Configuration,
CrawleeLogger,
CrawlingContext,
DatasetExportOptions,
EnqueueLinksOptions,
Expand Down Expand Up @@ -40,6 +41,7 @@ import {
EnqueueStrategy,
EventType,
KeyValueStore,
LogLevel,
mergeCookies,
NonRetryableError,
purgeDefaultStorages,
Expand Down Expand Up @@ -77,8 +79,6 @@ import { getDomain } from 'tldts';
import type { ReadonlyDeep, SetRequired } from 'type-fest';

import { LruCache } from '@apify/datastructures';
import type { Log } from '@apify/log';
import defaultLog, { LogLevel } from '@apify/log';
import { addTimeoutToPromise, TimeoutError, tryCancel } from '@apify/timeout';
import { cryptoRandomObjectId } from '@apify/utilities';

Expand Down Expand Up @@ -371,7 +371,7 @@ export interface BasicCrawlerOptions<
onSkippedRequest?: SkippedRequestCallback;

/** @internal */
log?: Log;
log?: CrawleeLogger;

/**
* Enables experimental features of Crawlee, which can alter the behavior of the crawler.
Expand Down Expand Up @@ -415,6 +415,12 @@ export interface BasicCrawlerOptions<
*/
eventManager?: EventManager;

/**
* Custom logger to use for this crawler.
* If provided, the crawler will use its own ServiceLocator instance instead of the global one.
*/
logger?: CrawleeLogger;

/**
* A unique identifier for the crawler instance. This ID is used to isolate the state returned by
* {@apilink BasicCrawler.useState|`crawler.useState()`} from other crawler instances.
Expand Down Expand Up @@ -586,7 +592,7 @@ export class BasicCrawler<
running = false;
hasFinishedBefore = false;

readonly log: Log;
readonly log: CrawleeLogger;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could this be a get property that reaches into serviceLocator instead? Just to avoid storing stuff in random places.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this basically following how it worked before (log property storing child logger)
before:

  • log -> child instance with prefix -> log = defaultLog.child({ prefix: this.constructor.name })
  • parent call for warning once -> defaultLog.warningOnce()

now:

  • log -> log = serviceLocator.getLogger().child({ prefix: this.constructor.name })
  • parent call for wanring once -> serviceLocator.getLogger().warningOnce()

only now there is internal log property and logger property for scopedServiceLocator, i think we do not need both 🤔

protected requestHandler!: RequestHandler<ExtendedContext>;
protected errorHandler?: ErrorHandler<CrawlingContext, ExtendedContext>;
protected failedRequestHandler?: ErrorHandler<CrawlingContext, ExtendedContext>;
Expand Down Expand Up @@ -651,6 +657,7 @@ export class BasicCrawler<
configuration: ow.optional.object,
storageClient: ow.optional.object,
eventManager: ow.optional.object,
logger: ow.optional.object,

// AutoscaledPool shorthands
minConcurrency: ow.optional.number,
Expand Down Expand Up @@ -695,6 +702,7 @@ export class BasicCrawler<
configuration,
storageClient,
eventManager,
logger,

// AutoscaledPool shorthands
minConcurrency,
Expand All @@ -714,7 +722,7 @@ export class BasicCrawler<
httpClient,

// internal
log = defaultLog.child({ prefix: this.constructor.name }),
log: logOverride,
experiments = {},

id,
Expand All @@ -730,15 +738,18 @@ export class BasicCrawler<
if (
storageClient ||
eventManager ||
logger ||
(configuration !== undefined && configuration !== serviceLocator.getConfiguration())
) {
const scopedServiceLocator = new ServiceLocator(configuration, eventManager, storageClient);
const scopedServiceLocator = new ServiceLocator(configuration, eventManager, storageClient, logger);
serviceLocatorScope = bindMethodsToServiceLocator(scopedServiceLocator, this);
}

try {
serviceLocatorScope.enterScope();

const log = logOverride ?? serviceLocator.getLogger().child({ prefix: this.constructor.name });

// Store whether the user explicitly provided an ID
this.hasExplicitId = id !== undefined;
// Store the user-provided ID, or generate a unique one for tracking purposes (not for state key)
Expand Down Expand Up @@ -833,7 +844,7 @@ export class BasicCrawler<
this.sameDomainDelayMillis = sameDomainDelaySecs * 1000;
this.maxSessionRotations = maxSessionRotations;
this.stats = new Statistics({
logMessage: `${log.getOptions().prefix} request statistics:`,
logMessage: `${log.getOptions().prefix ?? this.constructor.name} request statistics:`,
log,
...(this.hasExplicitId ? { id: this.crawlerId } : {}),
...statisticsOptions,
Expand Down Expand Up @@ -944,7 +955,7 @@ export class BasicCrawler<
async setStatusMessage(message: string, options: SetStatusMessageOptions = {}) {
const data =
options.isStatusMessageTerminal != null ? { terminal: options.isStatusMessageTerminal } : undefined;
this.log.internal(LogLevel[(options.level as 'DEBUG') ?? 'DEBUG'], message, data);
this.log.logWithLevel(LogLevel[(options.level as 'DEBUG') ?? 'DEBUG'], message, data);

const client = serviceLocator.getStorageClient();

Expand Down Expand Up @@ -1099,7 +1110,7 @@ export class BasicCrawler<
retryHistogram: this.stats.requestRetryHistogram,
...finalStats,
};
this.log.info('Final request statistics:', stats);
this.log.info('Final request statistics:', stats as unknown as Record<string, unknown>);

if (this.stats.errorTracker.total !== 0) {
const prettify = ([count, info]: [number, string[]]) =>
Expand Down Expand Up @@ -1193,12 +1204,14 @@ export class BasicCrawler<
BasicCrawler.useStateCrawlerIds.add(this.crawlerId);

if (BasicCrawler.useStateCrawlerIds.size > 1) {
defaultLog.warningOnce(
'Multiple crawler instances are calling useState() without an explicit `id` option. \n' +
'This means they will share the same state object, which is likely unintended. \n' +
'To fix this, provide a unique `id` option to each crawler instance. \n' +
'Example: new BasicCrawler({ id: "my-crawler-1", ... })',
);
serviceLocator
.getLogger()
.warningOnce(
'Multiple crawler instances are calling useState() without an explicit `id` option. \n' +
'This means they will share the same state object, which is likely unintended. \n' +
'To fix this, provide a unique `id` option to each crawler instance. \n' +
'Example: new BasicCrawler({ id: "my-crawler-1", ... })',
);
}

return kvs.getAutoSavedValue<State>(BasicCrawler.CRAWLEE_STATE_KEY, defaultValue);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { type CrawleeLogger, serviceLocator } from '@crawlee/core';
import type { Cookie, Dictionary } from '@crawlee/types';
import { nanoid } from 'nanoid';
import { TypedEmitter } from 'tiny-typed-emitter';
Expand All @@ -6,7 +7,6 @@ import { tryCancel } from '@apify/timeout';

import { BROWSER_CONTROLLER_EVENTS } from '../events.js';
import type { LaunchContext } from '../launch-context.js';
import { log } from '../logger.js';
import type { UnwrapPromise } from '../utils.js';
import type { BrowserPlugin, CommonBrowser, CommonLibrary } from './browser-plugin.js';

Expand Down Expand Up @@ -40,6 +40,7 @@ export abstract class BrowserController<
NewPageResult = UnwrapPromise<ReturnType<LaunchResult['newPage']>>,
> extends TypedEmitter<BrowserControllerEvents<Library, LibraryOptions, LaunchResult, NewPageOptions, NewPageResult>> {
id = nanoid();
protected log!: CrawleeLogger;

/**
* The `BrowserPlugin` instance used to launch the browser.
Expand Down Expand Up @@ -90,6 +91,7 @@ export abstract class BrowserController<

constructor(browserPlugin: BrowserPlugin<Library, LibraryOptions, LaunchResult, NewPageOptions, NewPageResult>) {
super();
this.log = serviceLocator.getLogger().child({ prefix: 'BrowserPool' });
this.browserPlugin = browserPlugin;
}

Expand Down Expand Up @@ -136,14 +138,14 @@ export abstract class BrowserController<
// TODO: shouldn't this go in a finally instead?
this.isActive = false;
} catch (error) {
log.debug(`Could not close browser.\nCause: ${(error as Error).message}`, { id: this.id });
this.log.debug(`Could not close browser.\nCause: ${(error as Error).message}`, { id: this.id });
}

this.emit(BROWSER_CONTROLLER_EVENTS.BROWSER_CLOSED, this);

setTimeout(() => {
this._kill().catch((err) => {
log.debug(`Could not kill browser.\nCause: ${err.message}`, { id: this.id });
this.log.debug(`Could not kill browser.\nCause: ${err.message}`, { id: this.id });
});
}, PROCESS_KILL_TIMEOUT_MILLIS);
}
Expand Down
4 changes: 3 additions & 1 deletion packages/browser-pool/src/abstract-classes/browser-plugin.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { CriticalError } from '@crawlee/core';
import { type CrawleeLogger, CriticalError, serviceLocator } from '@crawlee/core';
import type { Dictionary } from '@crawlee/types';
import merge from 'lodash.merge';

Expand Down Expand Up @@ -105,6 +105,7 @@ export abstract class BrowserPlugin<
NewPageResult = UnwrapPromise<ReturnType<LaunchResult['newPage']>>,
> {
name = this.constructor.name;
protected log!: CrawleeLogger;
library: Library;
launchOptions: LibraryOptions;
proxyUrl?: string;
Expand All @@ -121,6 +122,7 @@ export abstract class BrowserPlugin<
browserPerProxy = false,
} = options;

this.log = serviceLocator.getLogger().child({ prefix: 'BrowserPool' });
this.library = library;
this.launchOptions = launchOptions;
this.proxyUrl = proxyUrl && new URL(proxyUrl).href.slice(0, -1);
Expand Down
17 changes: 9 additions & 8 deletions packages/browser-pool/src/browser-pool.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import type { TieredProxy } from '@crawlee/core';
import { type CrawleeLogger, serviceLocator, type TieredProxy } from '@crawlee/core';
import type { BrowserFingerprintWithHeaders } from 'fingerprint-generator';
import { FingerprintGenerator } from 'fingerprint-generator';
import { FingerprintInjector } from 'fingerprint-injector';
Expand All @@ -20,7 +20,6 @@ import {
} from './fingerprinting/hooks.js';
import type { FingerprintGeneratorOptions } from './fingerprinting/types.js';
import type { LaunchContext } from './launch-context.js';
import { log } from './logger.js';
import type { InferBrowserPluginArray, UnwrapPromise } from './utils.js';

const PAGE_CLOSE_KILL_TIMEOUT_MILLIS = 1000;
Expand Down Expand Up @@ -334,9 +333,11 @@ export class BrowserPool<
private browserRetireInterval?: NodeJS.Timeout;

private limiter = pLimit(1);
private log!: CrawleeLogger;

constructor(options: Options & BrowserPoolHooks<BrowserControllerReturn, LaunchContextReturn, PageReturn>) {
super();
this.log = serviceLocator.getLogger().child({ prefix: 'BrowserPool' });

this.browserKillerInterval!.unref();

Expand Down Expand Up @@ -708,7 +709,7 @@ export class BrowserPool<
throw err;
}

log.debug('Launched new browser.', { id: browserController.id });
this.log.debug('Launched new browser.', { id: browserController.id });
browserController.proxyTier = proxyTier;
browserController.proxyUrl = proxyUrl;

Expand All @@ -719,7 +720,7 @@ export class BrowserPool<
} catch (err) {
this.startingBrowserControllers.delete(browserController);
browserController.close().catch((closeErr) => {
log.error(`Could not close browser whose post-launch hooks failed.\nCause:${closeErr.message}`, {
this.log.error(`Could not close browser whose post-launch hooks failed.\nCause:${closeErr.message}`, {
id: browserController.id,
});
});
Expand Down Expand Up @@ -774,15 +775,15 @@ export class BrowserPool<

if (isBrowserIdle || isBrowserEmpty) {
const { id } = controller;
log.debug('Closing retired browser.', { id });
this.log.debug('Closing retired browser.', { id });
await controller.close();
this.retiredBrowserControllers.delete(controller);
closedBrowserIds.push(id);
}
}

if (closedBrowserIds.length) {
log.debug('Closed retired browsers.', {
this.log.debug('Closed retired browsers.', {
count: closedBrowserIds.length,
closedBrowserIds,
});
Expand All @@ -798,7 +799,7 @@ export class BrowserPool<
await this._executeHooks(this.prePageCloseHooks, page, browserController);

await originalPageClose.apply(page, args).catch((err: Error) => {
log.debug(`Could not close page.\nCause:${err.message}`, { id: browserController.id });
this.log.debug(`Could not close page.\nCause:${err.message}`, { id: browserController.id });
});

await this._executeHooks(this.postPageCloseHooks, pageId, browserController);
Expand All @@ -821,7 +822,7 @@ export class BrowserPool<
// Run this with a delay, otherwise page.close()
// might fail with "Protocol error (Target.closeTarget): Target closed."
setTimeout(() => {
log.debug('Closing retired browser because it has no active pages', { id: browserController.id });
this.log.debug('Closing retired browser because it has no active pages', { id: browserController.id });
void browserController.close().finally(() => {
this.retiredBrowserControllers.delete(browserController);
});
Expand Down
5 changes: 0 additions & 5 deletions packages/browser-pool/src/logger.ts

This file was deleted.

3 changes: 1 addition & 2 deletions packages/browser-pool/src/playwright/playwright-plugin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import { BrowserPlugin } from '../abstract-classes/browser-plugin.js';
import { anonymizeProxySugar } from '../anonymize-proxy.js';
import type { createProxyServerForContainers } from '../container-proxy-server.js';
import type { LaunchContext } from '../launch-context.js';
import { log } from '../logger.js';
import { getLocalProxyAddress } from '../proxy-server.js';
import type { SafeParameters } from '../utils.js';
import { PlaywrightBrowser as PlaywrightBrowserWithPersistentContext } from './playwright-browser.js';
Expand Down Expand Up @@ -82,7 +81,7 @@ export class PlaywrightPlugin extends BrowserPlugin<
this._browserVersion = inactiveBrowser.version();

inactiveBrowser.close().catch((error) => {
log.exception(error, 'Failed to close browser.');
this.log.exception(error, 'Failed to close browser.');
});
}

Expand Down
7 changes: 3 additions & 4 deletions packages/browser-pool/src/puppeteer/puppeteer-controller.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import { tryCancel } from '@apify/timeout';

import { BrowserController } from '../abstract-classes/browser-controller.js';
import { anonymizeProxySugar } from '../anonymize-proxy.js';
import { log } from '../logger.js';

export interface PuppeteerNewPageOptions extends PuppeteerTypes.BrowserContextOptions {
proxyUsername?: string;
Expand Down Expand Up @@ -88,7 +87,7 @@ export class PuppeteerController extends BrowserController<
try {
await context.close();
} catch (error: any) {
log.exception(error, 'Failed to close context.');
this.log.exception(error, 'Failed to close context.');
} finally {
await close();
}
Expand Down Expand Up @@ -120,7 +119,7 @@ export class PuppeteerController extends BrowserController<
const browserProcess = this.browser.process();

if (!browserProcess) {
log.debug('Browser was connected using the `puppeteer.connect` method no browser to kill.');
this.log.debug('Browser was connected using the `puppeteer.connect` method no browser to kill.');
return;
}

Expand All @@ -135,7 +134,7 @@ export class PuppeteerController extends BrowserController<
await this.browser.close();
clearTimeout(timeout);
} catch (error) {
log.debug('Browser was already killed.', { error });
this.log.debug('Browser was already killed.', { error });
}
}

Expand Down
5 changes: 2 additions & 3 deletions packages/browser-pool/src/puppeteer/puppeteer-plugin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import type * as PuppeteerTypes from 'puppeteer';
import { BrowserPlugin } from '../abstract-classes/browser-plugin.js';
import { anonymizeProxySugar } from '../anonymize-proxy.js';
import type { LaunchContext } from '../launch-context.js';
import { log } from '../logger.js';
import { noop } from '../utils.js';
import type { PuppeteerNewPageOptions } from './puppeteer-controller.js';
import { PuppeteerController } from './puppeteer-controller.js';
Expand Down Expand Up @@ -95,12 +94,12 @@ export class PuppeteerPlugin extends BrowserPlugin<

if (page) {
page.on('error', (error) => {
log.exception(error, 'Page crashed.');
this.log.exception(error, 'Page crashed.');
page.close().catch(noop);
});
}
} catch (error: any) {
log.exception(error, 'Failed to retrieve page from target.');
this.log.exception(error, 'Failed to retrieve page from target.');
}
});

Expand Down
Loading