From 98b8f69d4118d9aa1ea0d430ed2564e1241cc220 Mon Sep 17 00:00:00 2001 From: touchRED Date: Tue, 21 Mar 2023 14:45:46 -0400 Subject: [PATCH 1/6] fix: replace moment with dayjs --- package.json | 4 +--- src/cleaners/date-published.js | 30 +++++++++++++++++---------- src/cleaners/date-published.test.js | 32 ++++++++++++++--------------- yarn.lock | 21 +++++-------------- 4 files changed, 41 insertions(+), 46 deletions(-) diff --git a/package.json b/package.json index 11b49c9e6..f5df5be41 100644 --- a/package.json +++ b/package.json @@ -118,13 +118,11 @@ "@postlight/ci-failed-test-reporter": "^1.0", "browser-request": "github:postlight/browser-request#feat-add-headers-to-response", "cheerio": "^0.22.0", + "dayjs": "^1.11.7", "difflib": "github:postlight/difflib.js", "ellipsize": "0.1.0", "iconv-lite": "0.5.0", "jquery": "^3.5.0", - "moment": "^2.23.0", - "moment-parseformat": "3.0.0", - "moment-timezone": "0.5.37", "postman-request": "^2.88.1-postman.31", "string-direction": "^0.1.2", "turndown": "^7.1.1", diff --git a/src/cleaners/date-published.js b/src/cleaners/date-published.js index 1e214ab08..70017782c 100644 --- a/src/cleaners/date-published.js +++ b/src/cleaners/date-published.js @@ -1,8 +1,8 @@ -import moment from 'moment-timezone'; -import parseFormat from 'moment-parseformat'; -// Is there a compelling reason to use moment here? -// Mostly only being used for the isValid() method, -// but could just check for 'Invalid Date' string. +import dayjs from 'dayjs'; +import customParseFormat from 'dayjs/plugin/customParseFormat'; +import utc from 'dayjs/plugin/utc'; +import timezonePlugin from 'dayjs/plugin/timezone'; +import advancedFormat from 'dayjs/plugin/advancedFormat'; import { MS_DATE_STRING, @@ -16,6 +16,11 @@ import { TIME_WITH_OFFSET_RE, } from './constants'; +dayjs.extend(customParseFormat); +dayjs.extend(utc); +dayjs.extend(timezonePlugin); +dayjs.extend(advancedFormat); + export function cleanDateString(dateString) { return (dateString.match(SPLIT_DATE_STRING) || []) .join(' ') @@ -27,21 +32,24 @@ export function cleanDateString(dateString) { export function createDate(dateString, timezone, format) { if (TIME_WITH_OFFSET_RE.test(dateString)) { - return moment(new Date(dateString)); + return dayjs(new Date(dateString)); } if (TIME_AGO_STRING.test(dateString)) { const fragments = TIME_AGO_STRING.exec(dateString); - return moment().subtract(fragments[1], fragments[2]); + return dayjs().subtract(fragments[1], fragments[2]); } if (TIME_NOW_STRING.test(dateString)) { - return moment(); + return dayjs(); } - return timezone - ? moment.tz(dateString, format || parseFormat(dateString), timezone) - : moment(dateString, format || parseFormat(dateString)); + if (timezone) { + return format + ? dayjs.tz(dateString, format, timezone) + : dayjs.tz(new Date(dateString), timezone); + } + return format ? dayjs(dateString, format) : dayjs(new Date(dateString)); } // Take a date published string, and hopefully return a date out of diff --git a/src/cleaners/date-published.test.js b/src/cleaners/date-published.test.js index cbeb771c8..47a462aad 100644 --- a/src/cleaners/date-published.test.js +++ b/src/cleaners/date-published.test.js @@ -1,5 +1,5 @@ import assert from 'assert'; -import moment from 'moment-timezone'; +import dayjs from 'dayjs'; import cleanDatePublished, { cleanDateString } from './date-published'; @@ -7,7 +7,7 @@ describe('cleanDatePublished(dateString)', () => { it('returns a date', () => { const datePublished = cleanDatePublished('published: 1/1/2020'); - assert.equal(datePublished, moment('1/1/2020', 'MM/DD/YYYY').toISOString()); + assert.equal(datePublished, dayjs('1/1/2020', 'M/D/YYYY').toISOString()); }); it('returns null if date is invalid', () => { @@ -28,37 +28,37 @@ describe('cleanDatePublished(dateString)', () => { it('accepts a custom date format', () => { // The JS date parser is forgiving, but // it needs am/pm separated from a time - const datePublished = cleanDatePublished('Mon Aug 03 12:45:00 EDT 2015', { + const datePublished = cleanDatePublished('Aug 03 12:45:00 EDT 2015', { timezone: 'America/New_York', - format: 'ddd MMM DD HH:mm:ss zz YYYY', + format: 'MMM DD HH:mm:ss z YYYY', }); assert.equal(datePublished, '2015-08-03T16:45:00.000Z'); }); it('can handle dates formatted as "[just|right] now"', () => { const date1 = cleanDatePublished('now'); - const newDate1 = moment(date1) + const newDate1 = dayjs(date1) .format() .split('T')[0]; - const expectedDate1 = moment() + const expectedDate1 = dayjs() .format() .split('T')[0]; assert.equal(newDate1, expectedDate1); const date2 = cleanDatePublished('just now'); - const newDate2 = moment(date2) + const newDate2 = dayjs(date2) .format() .split('T')[0]; - const expectedDate2 = moment() + const expectedDate2 = dayjs() .format() .split('T')[0]; assert.equal(newDate2, expectedDate2); const date3 = cleanDatePublished('right now'); - const newDate3 = moment(date3) + const newDate3 = dayjs(date3) .format() .split('T')[0]; - const expectedDate3 = moment() + const expectedDate3 = dayjs() .format() .split('T')[0]; assert.equal(newDate3, expectedDate3); @@ -69,30 +69,30 @@ describe('cleanDatePublished(dateString)', () => { // "X days ago" will not be accurate down to the exact time // "X months ago" will not be accurate down to the exact day const date1 = cleanDatePublished('1 hour ago'); - const newDate1 = moment(date1) + const newDate1 = dayjs(date1) .format() .split('T')[0]; - const expectedDate1 = moment() + const expectedDate1 = dayjs() .subtract(1, 'hour') .format() .split('T')[0]; assert.equal(newDate1, expectedDate1); const date2 = cleanDatePublished('5 days ago'); - const newDate2 = moment(date2) + const newDate2 = dayjs(date2) .format() .split('T')[0]; - const expectedDate2 = moment() + const expectedDate2 = dayjs() .subtract(5, 'days') .format() .split('T')[0]; assert.equal(newDate2, expectedDate2); const date3 = cleanDatePublished('10 months ago'); - const newDate3 = moment(date3) + const newDate3 = dayjs(date3) .format() .split('T')[0]; - const expectedDate3 = moment() + const expectedDate3 = dayjs() .subtract(10, 'months') .format() .split('T')[0]; diff --git a/yarn.lock b/yarn.lock index a23831dd8..492b03ad8 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2577,6 +2577,11 @@ date-now@^0.1.4: version "0.1.4" resolved "https://registry.yarnpkg.com/date-now/-/date-now-0.1.4.tgz#eaf439fd4d4848ad74e5cc7dbef200672b9e345b" +dayjs@^1.11.7: + version "1.11.7" + resolved "https://registry.yarnpkg.com/dayjs/-/dayjs-1.11.7.tgz#4b296922642f70999544d1144a2c25730fce63e2" + integrity sha512-+Yw9U6YO5TQohxLcIkrXBeY73WP3ejHWVvx8XCk3gxvQDCTEmS48ZrSZCKciI7Bhl/uCMyxYtE9UqRILmFphkQ== + debug@2.6.9, debug@^2.1.2, debug@^2.3.3, debug@^2.6.8, debug@^2.6.9: version "2.6.9" resolved "https://registry.yarnpkg.com/debug/-/debug-2.6.9.tgz#5d128515df134ff327e90a4c93f4e077a536341f" @@ -6132,22 +6137,6 @@ module-deps@^6.0.0: through2 "^2.0.0" xtend "^4.0.0" -moment-parseformat@3.0.0: - version "3.0.0" - resolved "https://registry.yarnpkg.com/moment-parseformat/-/moment-parseformat-3.0.0.tgz#3a1dc438b4bc073b7e93cc298cfb6c5daac26dba" - -moment-timezone@0.5.37: - version "0.5.37" - resolved "https://registry.yarnpkg.com/moment-timezone/-/moment-timezone-0.5.37.tgz#adf97f719c4e458fdb12e2b4e87b8bec9f4eef1e" - integrity sha512-uEDzDNFhfaywRl+vwXxffjjq1q0Vzr+fcQpQ1bU0kbzorfS7zVtZnCnGc8mhWmF39d4g4YriF6kwA75mJKE/Zg== - dependencies: - moment ">= 2.9.0" - -"moment@>= 2.9.0", moment@^2.23.0: - version "2.29.4" - resolved "https://registry.yarnpkg.com/moment/-/moment-2.29.4.tgz#3dbe052889fe7c1b2ed966fcb3a77328964ef108" - integrity sha512-5LC9SOxjSc2HF6vO2CyuTDNivEdoz2IvyJJGj6X8DJ0eFyfszE0QiEd+iXmBvUP3WHxSjFH/vIsA0EN00cgr8w== - ms@0.7.2: version "0.7.2" resolved "https://registry.yarnpkg.com/ms/-/ms-0.7.2.tgz#ae25cf2512b3885a1d95d7f037868d8431124765" From 6a5f892c68ac1dd155fbd380e55ab548e535737d Mon Sep 17 00:00:00 2001 From: touchRED Date: Mon, 27 Mar 2023 15:51:01 -0400 Subject: [PATCH 2/6] fix: update tests to remove dayjs where possible, update formats --- package.json | 4 +--- src/cleaners/date-published.js | 16 +++++++++++----- .../custom/clinicaltrials.gov/index.test.js | 3 +-- src/extractors/custom/fortune.com/index.js | 2 -- src/extractors/custom/genius.com/index.test.js | 4 +--- .../custom/news.nationalgeographic.com/index.js | 3 +-- .../news.nationalgeographic.com/index.test.js | 2 +- src/extractors/custom/people.com/index.test.js | 7 +------ .../custom/pitchfork.com/index.test.js | 6 +----- .../custom/takagi-hiromitsu.jp/index.test.js | 4 +--- .../custom/www.chicagotribune.com/index.js | 1 + .../custom/www.chicagotribune.com/index.test.js | 6 +----- src/extractors/custom/www.infoq.com/index.js | 2 +- src/extractors/custom/www.macrumors.com/index.js | 3 +-- src/extractors/custom/www.msn.com/index.test.js | 4 +--- .../www.nationalgeographic.com/index.test.js | 6 +----- src/extractors/custom/www.nbcnews.com/index.js | 2 +- .../custom/www.nbcnews.com/index.test.js | 2 +- .../custom/www.nydailynews.com/index.test.js | 6 +----- src/extractors/custom/www.ossnews.jp/index.js | 2 +- src/extractors/custom/www.phoronix.com/index.js | 2 +- .../custom/www.phoronix.com/index.test.js | 2 +- .../custom/www.politico.com/index.test.js | 6 +----- .../custom/www.prospectmagazine.co.uk/index.js | 2 -- .../custom/www.reddit.com/index.test.js | 10 ++++------ src/extractors/custom/www.spektrum.de/index.js | 1 + .../custom/www.today.com/index.test.js | 6 +----- .../generic/date-published/extractor.test.js | 6 +++--- src/extractors/generic/index.test.js | 4 ++-- 29 files changed, 43 insertions(+), 81 deletions(-) diff --git a/package.json b/package.json index f5df5be41..2c06cd91c 100644 --- a/package.json +++ b/package.json @@ -132,7 +132,6 @@ }, "bundleDependencies": [ "jquery", - "moment-timezone", "browser-request" ], "browser": { @@ -140,8 +139,7 @@ "cheerio": "./src/shims/cheerio-query", "jquery": "./node_modules/jquery/dist/jquery.min.js", "postman-request": "browser-request", - "iconv-lite": "./src/shims/iconv-lite", - "moment-timezone": "./node_modules/moment-timezone/builds/moment-timezone-with-data-2012-2022.min.js" + "iconv-lite": "./src/shims/iconv-lite" }, "husky": { "hooks": { diff --git a/src/cleaners/date-published.js b/src/cleaners/date-published.js index 70017782c..001f9e328 100644 --- a/src/cleaners/date-published.js +++ b/src/cleaners/date-published.js @@ -45,11 +45,17 @@ export function createDate(dateString, timezone, format) { } if (timezone) { - return format - ? dayjs.tz(dateString, format, timezone) - : dayjs.tz(new Date(dateString), timezone); + try { + return format + ? dayjs.tz(dateString, format, timezone) + : dayjs.tz(dayjs(dateString).format('YYYY-MM-DD HH:mm:ss'), timezone); + } catch (error) { + // return an intentionally invalid dayjs object, + // in case the input needs to be cleaned first + return dayjs(''); + } } - return format ? dayjs(dateString, format) : dayjs(new Date(dateString)); + return format ? dayjs(dateString, format) : dayjs(dateString); } // Take a date published string, and hopefully return a date out of @@ -70,7 +76,7 @@ export default function cleanDatePublished( if (!date.isValid()) { dateString = cleanDateString(dateString); - date = createDate(dateString, timezone, format); + date = createDate(dateString, timezone); } return date.isValid() ? date.toISOString() : null; diff --git a/src/extractors/custom/clinicaltrials.gov/index.test.js b/src/extractors/custom/clinicaltrials.gov/index.test.js index 73f881fb2..ea16bb4a5 100644 --- a/src/extractors/custom/clinicaltrials.gov/index.test.js +++ b/src/extractors/custom/clinicaltrials.gov/index.test.js @@ -1,7 +1,6 @@ import assert from 'assert'; import URL from 'url'; import cheerio from 'cheerio'; -import moment from 'moment-timezone'; import Mercury from 'mercury'; import getExtractor from 'extractors/get-extractor'; @@ -59,7 +58,7 @@ describe('ClinicaltrialsGovExtractor', () => { // Update these values with the expected values from // the article. - assert.equal(moment(date_published).format('YYYY-MM-DD'), '2018-11-21'); + assert.equal(date_published, '2018-11-21T05:00:00.000Z'); }); it('returns the content', async () => { diff --git a/src/extractors/custom/fortune.com/index.js b/src/extractors/custom/fortune.com/index.js index 199cf24af..1fc082803 100644 --- a/src/extractors/custom/fortune.com/index.js +++ b/src/extractors/custom/fortune.com/index.js @@ -11,8 +11,6 @@ export const FortuneComExtractor = { date_published: { selectors: ['.MblGHNMJ'], - - timezone: 'UTC', }, lead_image_url: { diff --git a/src/extractors/custom/genius.com/index.test.js b/src/extractors/custom/genius.com/index.test.js index 0468f65fe..ebefdd94d 100644 --- a/src/extractors/custom/genius.com/index.test.js +++ b/src/extractors/custom/genius.com/index.test.js @@ -1,7 +1,6 @@ import assert from 'assert'; import URL from 'url'; import cheerio from 'cheerio'; -import moment from 'moment'; import Mercury from 'mercury'; import getExtractor from 'extractors/get-extractor'; @@ -51,11 +50,10 @@ describe('GeniusComExtractor', () => { // To pass this test, fill out the date_published selector // in ./src/extractors/custom/genius.com/index.js. const { date_published } = await result; - const newDatePublished = moment(date_published).format(); // Update these values with the expected values from // the article. - assert.equal(newDatePublished.split('T')[0], '1984-06-25'); + assert.equal(date_published, '1984-06-25T04:00:00.000Z'); }); it('returns the lead_image_url', async () => { diff --git a/src/extractors/custom/news.nationalgeographic.com/index.js b/src/extractors/custom/news.nationalgeographic.com/index.js index 5c8ed8075..b0e02d4a4 100644 --- a/src/extractors/custom/news.nationalgeographic.com/index.js +++ b/src/extractors/custom/news.nationalgeographic.com/index.js @@ -11,8 +11,7 @@ export const NewsNationalgeographicComExtractor = { date_published: { selectors: [['meta[name="article:published_time"]', 'value']], - format: 'ddd MMM DD HH:mm:ss zz YYYY', - timezone: 'EST', + timezone: 'America/New_York', }, dek: { diff --git a/src/extractors/custom/news.nationalgeographic.com/index.test.js b/src/extractors/custom/news.nationalgeographic.com/index.test.js index 11b400d9d..863cfd6c5 100644 --- a/src/extractors/custom/news.nationalgeographic.com/index.test.js +++ b/src/extractors/custom/news.nationalgeographic.com/index.test.js @@ -49,7 +49,7 @@ describe('NewsNationalgeographicComExtractor', () => { // Update these values with the expected values from // the article. - assert.equal(date_published, '2015-08-03T17:45:00.000Z'); + assert.equal(date_published, '2015-08-03T16:45:00.000Z'); }); it('returns the dek', async () => { diff --git a/src/extractors/custom/people.com/index.test.js b/src/extractors/custom/people.com/index.test.js index 8559418c6..4de9dae7d 100644 --- a/src/extractors/custom/people.com/index.test.js +++ b/src/extractors/custom/people.com/index.test.js @@ -1,7 +1,6 @@ import assert from 'assert'; import URL from 'url'; import cheerio from 'cheerio'; -import moment from 'moment-timezone'; import Mercury from 'mercury'; import getExtractor from 'extractors/get-extractor'; @@ -55,13 +54,9 @@ describe('PeopleComExtractor', () => { // in ./src/extractors/custom/people.com/index.js. const { date_published } = await result; - const new_date_published = moment(date_published) - .format() - .split('T')[0]; - // Update these values with the expected values from // the article. - assert.equal(new_date_published, '2016-12-12'); + assert.equal(date_published, '2016-12-12T14:22:00.000Z'); }); it('returns the lead_image_url', async () => { diff --git a/src/extractors/custom/pitchfork.com/index.test.js b/src/extractors/custom/pitchfork.com/index.test.js index 2da225f7d..6e8c0cdb2 100644 --- a/src/extractors/custom/pitchfork.com/index.test.js +++ b/src/extractors/custom/pitchfork.com/index.test.js @@ -1,7 +1,6 @@ import assert from 'assert'; import URL from 'url'; import cheerio from 'cheerio'; -import moment from 'moment-timezone'; import Mercury from 'mercury'; import getExtractor from 'extractors/get-extractor'; @@ -41,11 +40,8 @@ describe('PitchforkComExtractor', () => { it('returns the date_published', async () => { const { date_published } = await result; - const new_date_published = moment(date_published) - .format() - .split('T')[0]; - assert.equal(new_date_published, '2019-06-07'); + assert.equal(date_published, '2019-06-07T04:00:00.000Z'); }); it('returns the dek', async () => { diff --git a/src/extractors/custom/takagi-hiromitsu.jp/index.test.js b/src/extractors/custom/takagi-hiromitsu.jp/index.test.js index 5c6a008c1..6207680c0 100644 --- a/src/extractors/custom/takagi-hiromitsu.jp/index.test.js +++ b/src/extractors/custom/takagi-hiromitsu.jp/index.test.js @@ -1,7 +1,6 @@ import assert from 'assert'; import URL from 'url'; import cheerio from 'cheerio'; -import moment from 'moment'; import Mercury from 'mercury'; import getExtractor from 'extractors/get-extractor'; @@ -57,11 +56,10 @@ describe('TakagihiromitsuJpExtractor', () => { // To pass this test, fill out the date_published selector // in ./src/extractors/custom/takagi-hiromitsu.jp/index.js. const { date_published } = await result; - const newDatePublished = moment(date_published).format(); // Update these values with the expected values from // the article. - assert.equal(newDatePublished.split('T')[0], '2019-02-17'); + assert.equal(date_published, '2019-02-17T14:34:06.000Z'); }); it('returns the dek', async () => { diff --git a/src/extractors/custom/www.chicagotribune.com/index.js b/src/extractors/custom/www.chicagotribune.com/index.js index edc63d59c..df4c944a5 100644 --- a/src/extractors/custom/www.chicagotribune.com/index.js +++ b/src/extractors/custom/www.chicagotribune.com/index.js @@ -11,6 +11,7 @@ export const WwwChicagotribuneComExtractor = { date_published: { selectors: ['time'], + timezone: 'America/Chicago', }, lead_image_url: { diff --git a/src/extractors/custom/www.chicagotribune.com/index.test.js b/src/extractors/custom/www.chicagotribune.com/index.test.js index 4a44437da..303ece3ff 100644 --- a/src/extractors/custom/www.chicagotribune.com/index.test.js +++ b/src/extractors/custom/www.chicagotribune.com/index.test.js @@ -1,7 +1,6 @@ import assert from 'assert'; import URL from 'url'; import cheerio from 'cheerio'; -import moment from 'moment-timezone'; import Mercury from 'mercury'; import getExtractor from 'extractors/get-extractor'; @@ -55,13 +54,10 @@ describe('WwwChicagotribuneComExtractor', () => { // To pass this test, fill out the date_published selector // in ./src/extractors/custom/www.chicagotribune.com/index.js. const { date_published } = await result; - const new_date_published = moment(date_published) - .format() - .split('T')[0]; // Update these values with the expected values from // the article. - assert.equal(new_date_published, '2016-12-13'); + assert.equal(date_published, '2016-12-13T21:45:00.000Z'); }); it('returns the lead_image_url', async () => { diff --git a/src/extractors/custom/www.infoq.com/index.js b/src/extractors/custom/www.infoq.com/index.js index 710f9d0ac..92053900b 100644 --- a/src/extractors/custom/www.infoq.com/index.js +++ b/src/extractors/custom/www.infoq.com/index.js @@ -11,7 +11,7 @@ export const WwwInfoqComExtractor = { date_published: { selectors: ['.article__readTime.date'], - format: 'YYYY年MM月DD日', + format: 'YYYY[年]M[月]D[日]', timezone: 'Asia/Tokyo', }, diff --git a/src/extractors/custom/www.macrumors.com/index.js b/src/extractors/custom/www.macrumors.com/index.js index 4ad307afc..6b7e27c29 100644 --- a/src/extractors/custom/www.macrumors.com/index.js +++ b/src/extractors/custom/www.macrumors.com/index.js @@ -11,8 +11,7 @@ export const WwwMacrumorsComExtractor = { date_published: { selectors: [['time', 'datetime']], - - timezone: 'America/Los_Angeles', + // timezone: 'America/Los_Angeles', }, dek: { diff --git a/src/extractors/custom/www.msn.com/index.test.js b/src/extractors/custom/www.msn.com/index.test.js index f73d6b7a5..c1f301572 100644 --- a/src/extractors/custom/www.msn.com/index.test.js +++ b/src/extractors/custom/www.msn.com/index.test.js @@ -1,7 +1,6 @@ import assert from 'assert'; import URL from 'url'; import cheerio from 'cheerio'; -import moment from 'moment'; import Mercury from 'mercury'; import getExtractor from 'extractors/get-extractor'; @@ -58,11 +57,10 @@ describe('MSNExtractor', () => { // To pass this test, fill out the date_published selector // in ./src/extractors/custom/www.msn.com/index.js. const { date_published } = await result; - const newDatePublished = moment(date_published).format(); // Update these values with the expected values from // the article. - assert.equal(newDatePublished.split('T')[0], '2016-09-21'); + assert.equal(date_published.split('T')[0], '2016-09-21'); }); it('returns the lead_image_url', async () => { diff --git a/src/extractors/custom/www.nationalgeographic.com/index.test.js b/src/extractors/custom/www.nationalgeographic.com/index.test.js index a59fb7885..966160019 100644 --- a/src/extractors/custom/www.nationalgeographic.com/index.test.js +++ b/src/extractors/custom/www.nationalgeographic.com/index.test.js @@ -1,7 +1,6 @@ import assert from 'assert'; import URL from 'url'; import cheerio from 'cheerio'; -import moment from 'moment-timezone'; import Mercury from 'mercury'; import getExtractor from 'extractors/get-extractor'; @@ -44,13 +43,10 @@ describe('WwwNationalgeographicComExtractor', () => { // To pass this test, fill out the date_published selector // in ./src/extractors/custom/www.nationalgeographic.com/index.js. const { date_published } = await result; - const new_date_published = moment(date_published) - .format() - .split('T')[0]; // Update these values with the expected values from // the article. - assert.equal(new_date_published, '2016-12-15'); + assert.equal(date_published.split('T')[0], '2016-12-15'); }); it('returns the dek', async () => { diff --git a/src/extractors/custom/www.nbcnews.com/index.js b/src/extractors/custom/www.nbcnews.com/index.js index 1b9f6c732..d79b19e97 100644 --- a/src/extractors/custom/www.nbcnews.com/index.js +++ b/src/extractors/custom/www.nbcnews.com/index.js @@ -19,7 +19,7 @@ export const WwwNbcnewsComExtractor = { '.flag_article-wrapper time', ], - timezone: 'America/New_York', + // timezone: 'America/New_York', }, lead_image_url: { diff --git a/src/extractors/custom/www.nbcnews.com/index.test.js b/src/extractors/custom/www.nbcnews.com/index.test.js index 704e277ec..988ba73ab 100644 --- a/src/extractors/custom/www.nbcnews.com/index.test.js +++ b/src/extractors/custom/www.nbcnews.com/index.test.js @@ -53,7 +53,7 @@ describe('WwwNbcnewsComExtractor', () => { // Update these values with the expected values from // the article. - assert.equal(date_published, '2016-12-13T23:06:00.000Z'); + assert.equal(date_published, '2016-12-13T18:06:00.000Z'); }); it('returns the lead_image_url', async () => { diff --git a/src/extractors/custom/www.nydailynews.com/index.test.js b/src/extractors/custom/www.nydailynews.com/index.test.js index b87b204bb..d3ea24464 100644 --- a/src/extractors/custom/www.nydailynews.com/index.test.js +++ b/src/extractors/custom/www.nydailynews.com/index.test.js @@ -1,7 +1,6 @@ import assert from 'assert'; import URL from 'url'; import cheerio from 'cheerio'; -import moment from 'moment-timezone'; import Mercury from 'mercury'; import getExtractor from 'extractors/get-extractor'; @@ -55,14 +54,11 @@ describe('WwwNydailynewsComExtractor', () => { // To pass this test, fill out the date_published selector // in ./src/extractors/custom/www.nydailynews.com/index.js. const { date_published } = await result; - const new_date_published = moment(date_published) - .format() - .split('T')[0]; // Update these values with the expected values from // the article. - assert.equal(new_date_published, '2016-12-16'); + assert.equal(date_published.split('T')[0], '2016-12-16'); }); it('returns the lead_image_url', async () => { diff --git a/src/extractors/custom/www.ossnews.jp/index.js b/src/extractors/custom/www.ossnews.jp/index.js index 1932f833c..396ca3483 100644 --- a/src/extractors/custom/www.ossnews.jp/index.js +++ b/src/extractors/custom/www.ossnews.jp/index.js @@ -9,7 +9,7 @@ export const WwwOssnewsJpExtractor = { date_published: { selectors: ['p.fs12'], - format: 'YYYY年MM月DD日 HH:mm', + format: 'YYYY[年]M[月]D[日] HH:mm', timezone: 'Asia/Tokyo', }, diff --git a/src/extractors/custom/www.phoronix.com/index.js b/src/extractors/custom/www.phoronix.com/index.js index 02179de12..2ac2a10eb 100644 --- a/src/extractors/custom/www.phoronix.com/index.js +++ b/src/extractors/custom/www.phoronix.com/index.js @@ -12,7 +12,7 @@ export const WwwPhoronixComExtractor = { date_published: { selectors: ['.author'], // 1 June 2019 at 08:34 PM EDT - format: 'D MMMM YYYY at hh:mm', + format: 'D MMMM YYYY [at] hh:mm A', timezone: 'America/New_York', }, diff --git a/src/extractors/custom/www.phoronix.com/index.test.js b/src/extractors/custom/www.phoronix.com/index.test.js index 5e6abe6e7..d4ca6494d 100644 --- a/src/extractors/custom/www.phoronix.com/index.test.js +++ b/src/extractors/custom/www.phoronix.com/index.test.js @@ -57,7 +57,7 @@ describe('WwwPhoronixComExtractor', () => { // Update these values with the expected values from // the article. - assert.equal(date_published, '2019-06-01T12:34:00.000Z'); + assert.equal(date_published, '2019-06-02T00:34:00.000Z'); }); it('returns the dek', async () => { diff --git a/src/extractors/custom/www.politico.com/index.test.js b/src/extractors/custom/www.politico.com/index.test.js index 3d9e44fd1..290288c73 100644 --- a/src/extractors/custom/www.politico.com/index.test.js +++ b/src/extractors/custom/www.politico.com/index.test.js @@ -1,7 +1,6 @@ import assert from 'assert'; import URL from 'url'; import cheerio from 'cheerio'; -import moment from 'moment-timezone'; import Mercury from 'mercury'; import getExtractor from 'extractors/get-extractor'; @@ -55,13 +54,10 @@ describe('PoliticoExtractor', () => { // To pass this test, fill out the date_published selector // in ./src/extractors/custom/www.politico.com/index.js. const { date_published } = await result; - const new_date_published = moment(date_published) - .format() - .split('T')[0]; // Update these values with the expected values from // the article. - assert.equal(new_date_published, '2016-10-04'); + assert.equal(date_published.split('T')[0], '2016-10-04'); }); it('returns the lead_image_url', async () => { diff --git a/src/extractors/custom/www.prospectmagazine.co.uk/index.js b/src/extractors/custom/www.prospectmagazine.co.uk/index.js index 4cd13c6fd..2a66a8b44 100644 --- a/src/extractors/custom/www.prospectmagazine.co.uk/index.js +++ b/src/extractors/custom/www.prospectmagazine.co.uk/index.js @@ -11,8 +11,6 @@ export const WwwProspectmagazineCoUkExtractor = { date_published: { selectors: [['meta[name="article:published_time"]', 'value'], '.post-info'], - - timezone: 'Europe/London', }, dek: { diff --git a/src/extractors/custom/www.reddit.com/index.test.js b/src/extractors/custom/www.reddit.com/index.test.js index e845678df..7cd98d171 100644 --- a/src/extractors/custom/www.reddit.com/index.test.js +++ b/src/extractors/custom/www.reddit.com/index.test.js @@ -1,7 +1,7 @@ import assert from 'assert'; import URL from 'url'; import cheerio from 'cheerio'; -import moment from 'moment-timezone'; +import dayjs from 'dayjs'; import Mercury from 'mercury'; import getExtractor from 'extractors/get-extractor'; @@ -55,17 +55,15 @@ describe('WwwRedditComExtractor', () => { // To pass this test, fill out the date_published selector // in ./src/extractors/custom/www.reddit.com/index.js. const { date_published } = await result; - const newDatePublished = moment(date_published) - .format() - .split('T')[0]; - const expectedDate = moment() + + const expectedDate = dayjs() .subtract(4, 'years') .format() .split('T')[0]; // Update these values with the expected values from // the article. - assert.equal(newDatePublished, expectedDate); + assert.equal(date_published.split('T')[0], expectedDate); }); it('returns the lead_image_url', async () => { diff --git a/src/extractors/custom/www.spektrum.de/index.js b/src/extractors/custom/www.spektrum.de/index.js index 88fad8d0a..f2bca07da 100644 --- a/src/extractors/custom/www.spektrum.de/index.js +++ b/src/extractors/custom/www.spektrum.de/index.js @@ -11,6 +11,7 @@ export const SpektrumExtractor = { date_published: { selectors: ['.content__meta__date'], + format: 'DD[.]MM[.]YYYY', timezone: 'Europe/Berlin', }, diff --git a/src/extractors/custom/www.today.com/index.test.js b/src/extractors/custom/www.today.com/index.test.js index beb390ace..f36dafd4f 100644 --- a/src/extractors/custom/www.today.com/index.test.js +++ b/src/extractors/custom/www.today.com/index.test.js @@ -1,7 +1,6 @@ import assert from 'assert'; import URL from 'url'; import cheerio from 'cheerio'; -import moment from 'moment-timezone'; import Mercury from 'mercury'; import getExtractor from 'extractors/get-extractor'; @@ -55,13 +54,10 @@ describe('WwwTodayComExtractor', () => { // To pass this test, fill out the date_published selector // in ./src/extractors/custom/www.today.com/index.js. const { date_published } = await result; - const new_date_published = moment(date_published) - .format() - .split('T')[0]; // Update these values with the expected values from // the article. - assert.equal(new_date_published, '2016-12-22'); + assert.equal(date_published.split('T')[0], '2016-12-22'); }); it('returns the lead_image_url', async () => { diff --git a/src/extractors/generic/date-published/extractor.test.js b/src/extractors/generic/date-published/extractor.test.js index e20b6b975..4038e5e8b 100644 --- a/src/extractors/generic/date-published/extractor.test.js +++ b/src/extractors/generic/date-published/extractor.test.js @@ -1,6 +1,6 @@ import assert from 'assert'; import cheerio from 'cheerio'; -import moment from 'moment-timezone'; +import dayjs from 'dayjs'; import GenericDatePublishedExtractor from './extractor'; @@ -67,7 +67,7 @@ describe('GenericDatePublishedExtractor', () => { metaCache, }); - assert.equal(result, moment('2020-01-01', 'YYYY-MM-DD').toISOString()); + assert.equal(result, dayjs('2020-01-01', 'YYYY-MM-DD').toISOString()); }); it('extracts from url formatted /2020/jan/01', () => { @@ -83,7 +83,7 @@ describe('GenericDatePublishedExtractor', () => { metaCache, }); - assert.equal(result, moment(new Date('2020 jan 01')).toISOString()); + assert.equal(result, dayjs(new Date('2020 jan 01')).toISOString()); } }); diff --git a/src/extractors/generic/index.test.js b/src/extractors/generic/index.test.js index fcb0bffb0..13f37e01e 100644 --- a/src/extractors/generic/index.test.js +++ b/src/extractors/generic/index.test.js @@ -1,5 +1,5 @@ import assert from 'assert'; -import moment from 'moment'; +import dayjs from 'dayjs'; import GenericExtractor from './index'; @@ -18,7 +18,7 @@ describe('GenericExtractor', () => { html, metaCache: [], }); - const newDatePublished = moment(date_published).format(); + const newDatePublished = dayjs(date_published).format(); assert.equal(author, null); assert.equal( From 8ad9309e3b828ac8e9d9c9b3f1fd65feeb485506 Mon Sep 17 00:00:00 2001 From: touchRED Date: Tue, 28 Mar 2023 18:11:57 -0400 Subject: [PATCH 3/6] fix: update some extractors to remove unnecessary timezones --- src/extractors/custom/www.mentalfloss.com/index.js | 2 +- src/extractors/custom/www.newyorker.com/index.js | 2 +- src/extractors/custom/www.rawstory.com/index.js | 2 +- src/extractors/custom/www.rollingstone.com/index.js | 2 +- src/extractors/custom/www.si.com/index.js | 2 +- src/extractors/custom/www.usmagazine.com/index.js | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/extractors/custom/www.mentalfloss.com/index.js b/src/extractors/custom/www.mentalfloss.com/index.js index f1ff836e6..f888d6ae2 100644 --- a/src/extractors/custom/www.mentalfloss.com/index.js +++ b/src/extractors/custom/www.mentalfloss.com/index.js @@ -22,7 +22,7 @@ export const WwwMentalflossComExtractor = { ['meta[name="article:published_time"]', 'value'], '.date-display-single', ], - timezone: 'America/New_York', + // timezone: 'America/New_York', }, lead_image_url: { diff --git a/src/extractors/custom/www.newyorker.com/index.js b/src/extractors/custom/www.newyorker.com/index.js index b7ed305f0..d4b60c50b 100644 --- a/src/extractors/custom/www.newyorker.com/index.js +++ b/src/extractors/custom/www.newyorker.com/index.js @@ -47,7 +47,7 @@ export const NewYorkerExtractor = { 'time.content-header__publish-date', ['meta[name="pubdate"]', 'value'], ], - timezone: 'America/New_York', + // timezone: 'America/New_York', }, lead_image_url: { diff --git a/src/extractors/custom/www.rawstory.com/index.js b/src/extractors/custom/www.rawstory.com/index.js index ff00f05fd..81a4a533c 100644 --- a/src/extractors/custom/www.rawstory.com/index.js +++ b/src/extractors/custom/www.rawstory.com/index.js @@ -18,7 +18,7 @@ export const WwwRawstoryComExtractor = { '.blog-author a:last-of-type', ], - timezone: 'EST', + // timezone: 'EST', }, lead_image_url: { diff --git a/src/extractors/custom/www.rollingstone.com/index.js b/src/extractors/custom/www.rollingstone.com/index.js index 78c2f44f4..7a5733010 100644 --- a/src/extractors/custom/www.rollingstone.com/index.js +++ b/src/extractors/custom/www.rollingstone.com/index.js @@ -15,7 +15,7 @@ export const WwwRollingstoneComExtractor = { 'time.content-published-date', ], - timezone: 'America/New_York', + // timezone: 'America/New_York', }, dek: { diff --git a/src/extractors/custom/www.si.com/index.js b/src/extractors/custom/www.si.com/index.js index f1f2d85f5..1d6638619 100644 --- a/src/extractors/custom/www.si.com/index.js +++ b/src/extractors/custom/www.si.com/index.js @@ -12,7 +12,7 @@ export const WwwSiComExtractor = { date_published: { selectors: [['meta[name="published"]', 'value']], - timezone: 'America/New_York', + // timezone: 'America/New_York', }, dek: { diff --git a/src/extractors/custom/www.usmagazine.com/index.js b/src/extractors/custom/www.usmagazine.com/index.js index 5a6ae0171..5482fa7f5 100644 --- a/src/extractors/custom/www.usmagazine.com/index.js +++ b/src/extractors/custom/www.usmagazine.com/index.js @@ -10,7 +10,7 @@ export const WwwUsmagazineComExtractor = { }, date_published: { - timezone: 'America/New_York', + // timezone: 'America/New_York', selectors: [['meta[name="article:published_time"]', 'value']], }, From 3c336ef1559f386cd413b1509a52a04491308d4b Mon Sep 17 00:00:00 2001 From: touchRED Date: Wed, 29 Mar 2023 11:35:56 -0400 Subject: [PATCH 4/6] fix: add UTC timezone to extractors where needed --- src/extractors/custom/clinicaltrials.gov/index.js | 1 + src/extractors/custom/clinicaltrials.gov/index.test.js | 2 +- src/extractors/custom/genius.com/index.js | 1 + src/extractors/custom/genius.com/index.test.js | 2 +- src/extractors/custom/people.com/index.js | 1 + src/extractors/custom/people.com/index.test.js | 2 +- src/extractors/custom/pitchfork.com/index.js | 1 + src/extractors/custom/pitchfork.com/index.test.js | 2 +- 8 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/extractors/custom/clinicaltrials.gov/index.js b/src/extractors/custom/clinicaltrials.gov/index.js index c7a0f195c..725af7c21 100644 --- a/src/extractors/custom/clinicaltrials.gov/index.js +++ b/src/extractors/custom/clinicaltrials.gov/index.js @@ -12,6 +12,7 @@ export const ClinicaltrialsGovExtractor = { date_published: { // selectors: ['span.term[data-term="Last Update Posted"]'], selectors: ['div:has(> span.term[data-term="Last Update Posted"])'], + timezone: 'UTC', }, content: { diff --git a/src/extractors/custom/clinicaltrials.gov/index.test.js b/src/extractors/custom/clinicaltrials.gov/index.test.js index ea16bb4a5..1069d5a26 100644 --- a/src/extractors/custom/clinicaltrials.gov/index.test.js +++ b/src/extractors/custom/clinicaltrials.gov/index.test.js @@ -58,7 +58,7 @@ describe('ClinicaltrialsGovExtractor', () => { // Update these values with the expected values from // the article. - assert.equal(date_published, '2018-11-21T05:00:00.000Z'); + assert.equal(date_published, '2018-11-21T00:00:00.000Z'); }); it('returns the content', async () => { diff --git a/src/extractors/custom/genius.com/index.js b/src/extractors/custom/genius.com/index.js index 86d7f4bb1..4d0c3f065 100644 --- a/src/extractors/custom/genius.com/index.js +++ b/src/extractors/custom/genius.com/index.js @@ -20,6 +20,7 @@ export const GeniusComExtractor = { }, ], ], + timezone: 'UTC', }, dek: { diff --git a/src/extractors/custom/genius.com/index.test.js b/src/extractors/custom/genius.com/index.test.js index ebefdd94d..84f6b5954 100644 --- a/src/extractors/custom/genius.com/index.test.js +++ b/src/extractors/custom/genius.com/index.test.js @@ -53,7 +53,7 @@ describe('GeniusComExtractor', () => { // Update these values with the expected values from // the article. - assert.equal(date_published, '1984-06-25T04:00:00.000Z'); + assert.equal(date_published, '1984-06-25T00:00:00.000Z'); }); it('returns the lead_image_url', async () => { diff --git a/src/extractors/custom/people.com/index.js b/src/extractors/custom/people.com/index.js index 99b8dac53..344f13951 100644 --- a/src/extractors/custom/people.com/index.js +++ b/src/extractors/custom/people.com/index.js @@ -14,6 +14,7 @@ export const PeopleComExtractor = { '.mntl-attribution__item-date', ['meta[name="article:published_time"]', 'value'], ], + timezone: 'UTC', }, lead_image_url: { diff --git a/src/extractors/custom/people.com/index.test.js b/src/extractors/custom/people.com/index.test.js index 4de9dae7d..be1951b71 100644 --- a/src/extractors/custom/people.com/index.test.js +++ b/src/extractors/custom/people.com/index.test.js @@ -56,7 +56,7 @@ describe('PeopleComExtractor', () => { // Update these values with the expected values from // the article. - assert.equal(date_published, '2016-12-12T14:22:00.000Z'); + assert.equal(date_published, '2016-12-12T09:22:00.000Z'); }); it('returns the lead_image_url', async () => { diff --git a/src/extractors/custom/pitchfork.com/index.js b/src/extractors/custom/pitchfork.com/index.js index b199e7c19..c6b78f183 100644 --- a/src/extractors/custom/pitchfork.com/index.js +++ b/src/extractors/custom/pitchfork.com/index.js @@ -14,6 +14,7 @@ export const PitchforkComExtractor = { date_published: { selectors: ['div[class^="InfoSliceWrapper-"]', ['.pub-date', 'datetime']], + timezone: 'UTC', }, dek: { diff --git a/src/extractors/custom/pitchfork.com/index.test.js b/src/extractors/custom/pitchfork.com/index.test.js index 6e8c0cdb2..b2b8ebe43 100644 --- a/src/extractors/custom/pitchfork.com/index.test.js +++ b/src/extractors/custom/pitchfork.com/index.test.js @@ -41,7 +41,7 @@ describe('PitchforkComExtractor', () => { it('returns the date_published', async () => { const { date_published } = await result; - assert.equal(date_published, '2019-06-07T04:00:00.000Z'); + assert.equal(date_published, '2019-06-07T00:00:00.000Z'); }); it('returns the dek', async () => { From c49b5efc7a828a1349011636446798aec084ff18 Mon Sep 17 00:00:00 2001 From: touchRED Date: Wed, 29 Mar 2023 11:59:31 -0400 Subject: [PATCH 5/6] fix: few more extractor updates --- src/extractors/custom/gothamist.com/index.js | 2 -- src/extractors/custom/news.nationalgeographic.com/index.js | 1 - src/extractors/custom/www.al.com/index.js | 2 +- src/extractors/custom/www.al.com/index.test.js | 2 +- 4 files changed, 2 insertions(+), 5 deletions(-) diff --git a/src/extractors/custom/gothamist.com/index.js b/src/extractors/custom/gothamist.com/index.js index bcbccbffb..d149113e8 100644 --- a/src/extractors/custom/gothamist.com/index.js +++ b/src/extractors/custom/gothamist.com/index.js @@ -24,8 +24,6 @@ export const GothamistComExtractor = { 'abbr', 'abbr.published', ], - - timezone: 'America/New_York', }, dek: { diff --git a/src/extractors/custom/news.nationalgeographic.com/index.js b/src/extractors/custom/news.nationalgeographic.com/index.js index b0e02d4a4..55e82c920 100644 --- a/src/extractors/custom/news.nationalgeographic.com/index.js +++ b/src/extractors/custom/news.nationalgeographic.com/index.js @@ -11,7 +11,6 @@ export const NewsNationalgeographicComExtractor = { date_published: { selectors: [['meta[name="article:published_time"]', 'value']], - timezone: 'America/New_York', }, dek: { diff --git a/src/extractors/custom/www.al.com/index.js b/src/extractors/custom/www.al.com/index.js index 35a519d13..e3be94611 100644 --- a/src/extractors/custom/www.al.com/index.js +++ b/src/extractors/custom/www.al.com/index.js @@ -11,7 +11,7 @@ export const WwwAlComExtractor = { date_published: { selectors: [['meta[name="article_date_original"]', 'value']], - timezone: 'EST', + timezone: 'CST', }, lead_image_url: { diff --git a/src/extractors/custom/www.al.com/index.test.js b/src/extractors/custom/www.al.com/index.test.js index c9598dc12..6503be63a 100644 --- a/src/extractors/custom/www.al.com/index.test.js +++ b/src/extractors/custom/www.al.com/index.test.js @@ -57,7 +57,7 @@ describe('WwwAlComExtractor', () => { // Update these values with the expected values from // the article. - assert.equal(date_published, '2016-12-22T23:47:00.000Z'); + assert.equal(date_published, '2016-12-23T00:47:00.000Z'); }); it('returns the lead_image_url', async () => { From 9cf23f197bc1f721cc058d9f65e9a78140ff2a22 Mon Sep 17 00:00:00 2001 From: touchRED Date: Wed, 29 Mar 2023 13:30:00 -0400 Subject: [PATCH 6/6] fix: update al.com timezone --- src/extractors/custom/www.al.com/index.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/extractors/custom/www.al.com/index.js b/src/extractors/custom/www.al.com/index.js index e3be94611..4d762e1af 100644 --- a/src/extractors/custom/www.al.com/index.js +++ b/src/extractors/custom/www.al.com/index.js @@ -11,7 +11,7 @@ export const WwwAlComExtractor = { date_published: { selectors: [['meta[name="article_date_original"]', 'value']], - timezone: 'CST', + timezone: 'America/Chicago', }, lead_image_url: {