From b68dd0edc939f28525344457651491283f7834dc Mon Sep 17 00:00:00 2001 From: "Herman J. Radtke III" Date: Sat, 9 Nov 2019 11:36:29 -0800 Subject: [PATCH] DbUrlList now honors recrawlInMs option. Fixes #40 --- lib/DbUrlList.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/DbUrlList.js b/lib/DbUrlList.js index b2bb35f..6fbdd69 100644 --- a/lib/DbUrlList.js +++ b/lib/DbUrlList.js @@ -179,7 +179,7 @@ DbUrlList.prototype._calcNextRetryDate = function (numErrors) { // If we want to schedule a crawl now, we subtract a random number of // seconds. This ensures the order we crawl URLs is random; otherwise, if // we parse a sitemap, we could get stuck crawling one host for hours. - delay = - Math.random() * YEAR_MS; + delay = - Math.random() * this._recrawlInMs; } else { delay = this._initialRetryTime * Math.pow(2, numErrors - 1); }