Skip to content

Commit d39eae8

Browse files
committed
add axios-retry to retry client connection issues
update package versions, tests and docs
1 parent 3b113eb commit d39eae8

File tree

10 files changed

+1274
-4602
lines changed

10 files changed

+1274
-4602
lines changed

__tests__/client.test.ts

Lines changed: 33 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,6 @@ import * as errors from '../src/errors.js';
55
import { ScrapeConfig } from '../src/scrapeconfig.js';
66
import { describe, it, expect, jest, beforeEach } from '@jest/globals';
77

8-
jest.mock('axios');
9-
10-
const mockedAxios = axios as jest.Mocked<typeof axios>;
118

129
function resultFactory(params: {
1310
url?: string;
@@ -39,7 +36,7 @@ describe('concurrent scrape', () => {
3936
// mock axios to return /account data and 2 types of results:
4037
// - success for /success endpoints
4138
// - ASP failure for /failure endpoints
42-
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
39+
jest.spyOn(axios, 'request').mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
4340
if (config.url.includes('/account')) {
4441
return {
4542
status: 200,
@@ -71,7 +68,7 @@ describe('concurrent scrape', () => {
7168
});
7269

7370
beforeEach(() => {
74-
mockedAxios.request.mockClear(); // clear all mock meta on each test
71+
jest.spyOn(axios, 'request').mockClear(); // clear all mock meta on each test
7572
});
7673

7774
it('success', async () => {
@@ -91,7 +88,7 @@ describe('concurrent scrape', () => {
9188
expect(results.length).toBe(5);
9289
expect(errors.length).toBe(5);
9390
// 10 requests and 1 account info
94-
expect(mockedAxios.request).toHaveBeenCalledTimes(11);
91+
expect(jest.spyOn(axios, 'request')).toHaveBeenCalledTimes(11);
9592
}, 5_000);
9693

9794
it('success with explicit concurrency', async () => {
@@ -111,7 +108,7 @@ describe('concurrent scrape', () => {
111108
expect(results.length).toBe(5);
112109
expect(errors.length).toBe(5);
113110
// 10 requests and 1 account info
114-
expect(mockedAxios.request).toHaveBeenCalledTimes(10);
111+
expect(jest.spyOn(axios, 'request')).toHaveBeenCalledTimes(10);
115112
}, 2_000);
116113
});
117114

@@ -120,12 +117,12 @@ describe('scrape', () => {
120117
const client = new ScrapflyClient({ key: KEY });
121118

122119
beforeEach(() => {
123-
mockedAxios.request.mockClear(); // clear all mock meta on each test
120+
jest.spyOn(axios, 'request').mockClear(); // clear all mock meta on each test
124121
});
125122

126123
it('GET success', async () => {
127124
const url = 'https://httpbin.dev/json';
128-
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
125+
jest.spyOn(axios, 'request').mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
129126
// Ensure the URL matches the pattern
130127
expect(config.url).toMatch(client.HOST + '/scrape');
131128
expect(config.method).toEqual('GET');
@@ -144,12 +141,12 @@ describe('scrape', () => {
144141
expect(result.context.asp).toBe(false);
145142
expect(result.uuid).toBe('1234');
146143
// a single request:
147-
expect(mockedAxios.request).toHaveBeenCalledTimes(1);
144+
expect(jest.spyOn(axios, 'request')).toHaveBeenCalledTimes(1);
148145
});
149146

150147
it('GET complex urls', async () => {
151148
const url = 'https://httpbin.dev/anything/?website=https://httpbin.dev/anything';
152-
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
149+
jest.spyOn(axios, 'request').mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
153150
// Ensure the URL matches the pattern
154151
expect(config.url).toMatch(client.HOST + '/scrape');
155152
expect(config.method).toEqual('GET');
@@ -168,12 +165,12 @@ describe('scrape', () => {
168165
expect(result.context.asp).toBe(false);
169166
expect(result.uuid).toBe('1234');
170167
// a single request:
171-
expect(mockedAxios.request).toHaveBeenCalledTimes(1);
168+
expect(jest.spyOn(axios, 'request')).toHaveBeenCalledTimes(1);
172169
});
173170

174171
it('POST success', async () => {
175172
const url = 'https://httpbin.dev/json';
176-
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
173+
jest.spyOn(axios, 'request').mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
177174
// Ensure the URL matches the pattern
178175
expect(config.url).toMatch(client.HOST + '/scrape');
179176
expect(config.method).toEqual('POST');
@@ -198,16 +195,17 @@ describe('scrape', () => {
198195
expect(result.config.url).toBe('https://httpbin.dev/json');
199196
expect(result.context.asp).toBe(false);
200197
expect(result.uuid).toBe('1234');
201-
expect(mockedAxios.request).toHaveBeenCalledTimes(1);
198+
expect(jest.spyOn(axios, 'request')).toHaveBeenCalledTimes(1);
202199
});
203200

204201
it('unhandled errors propagate up', async () => {
202+
jest.spyOn(axios, 'request').mockReset();
205203
const url = 'https://httpbin.dev/json';
206-
mockedAxios.request.mockImplementation(() => Promise.reject(new Error('Network Error')));
204+
jest.spyOn(axios, 'request').mockImplementation(() => Promise.reject(new Error('Foo Error')));
207205

208206
await expect(async () => {
209207
await client.scrape(new ScrapeConfig({ url }));
210-
}).rejects.toThrow('Network Error');
208+
}).rejects.toThrow('Foo Error');
211209
});
212210
// it('handles ')
213211
});
@@ -230,12 +228,12 @@ describe('client errors', () => {
230228
const client = new ScrapflyClient({ key: KEY });
231229

232230
beforeEach(() => {
233-
mockedAxios.request.mockClear(); // clear all mock meta on each test
231+
jest.spyOn(axios, 'request').mockClear(); // clear all mock meta on each test
234232
});
235233

236234
it('raises ApiHttpServerError on 500 and success', async () => {
237235
const url = 'https://httpbin.dev/json';
238-
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
236+
jest.spyOn(axios, 'request').mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
239237
return resultFactory({
240238
url: config.url,
241239
status_code: 500,
@@ -249,7 +247,7 @@ describe('client errors', () => {
249247

250248
it('raises BadApiKeyError on 401', async () => {
251249
const url = 'https://httpbin.dev/json';
252-
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
250+
jest.spyOn(axios, 'request').mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
253251
return resultFactory({
254252
url: config.url,
255253
status_code: 401,
@@ -262,7 +260,7 @@ describe('client errors', () => {
262260
});
263261
it('raises TooManyRequests on 429 and success', async () => {
264262
const url = 'https://httpbin.dev/json';
265-
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
263+
jest.spyOn(axios, 'request').mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
266264
return resultFactory({
267265
url: config.url,
268266
status_code: 429,
@@ -273,7 +271,7 @@ describe('client errors', () => {
273271
await expect(client.scrape(new ScrapeConfig({ url }))).rejects.toThrow(errors.TooManyRequests);
274272
});
275273
it('raises ScrapflyScrapeError on ::SCRAPE:: resource and success', async () => {
276-
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
274+
jest.spyOn(axios, 'request').mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
277275
return resultFactory({
278276
url: config.url,
279277
status: 'ERR::SCRAPE::BAD_PROTOCOL',
@@ -286,7 +284,7 @@ describe('client errors', () => {
286284
});
287285

288286
it('raises ScrapflyWebhookError on ::WEBHOOK:: resource and success', async () => {
289-
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
287+
jest.spyOn(axios, 'request').mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
290288
return resultFactory({
291289
url: config.url,
292290
status: 'ERR::WEBHOOK::DISABLED ',
@@ -298,7 +296,7 @@ describe('client errors', () => {
298296
);
299297
});
300298
it('raises ScrapflyProxyError on ERR::PROXY::POOL_NOT_FOUND resource and success', async () => {
301-
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
299+
jest.spyOn(axios, 'request').mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
302300
return resultFactory({
303301
url: config.url,
304302
status: 'ERR::PROXY::POOL_NOT_FOUND ',
@@ -311,7 +309,7 @@ describe('client errors', () => {
311309
});
312310

313311
it('raises ScrapflyScheduleError on ERR::SCHEDULE::DISABLED resource and success', async () => {
314-
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
312+
jest.spyOn(axios, 'request').mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
315313
return resultFactory({
316314
url: config.url,
317315
status: 'ERR::SCHEDULE::DISABLED',
@@ -324,7 +322,7 @@ describe('client errors', () => {
324322
});
325323

326324
it('raises ScrapflyAspError on ERR::ASP::SHIELD_ERROR resource and success', async () => {
327-
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
325+
jest.spyOn(axios, 'request').mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
328326
return resultFactory({
329327
url: config.url,
330328
status: 'ERR::ASP::SHIELD_ERROR',
@@ -337,7 +335,7 @@ describe('client errors', () => {
337335
});
338336

339337
it('raises ScrapflySessionError on ERR::SESSION::CONCURRENT_ACCESS resource and success', async () => {
340-
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
338+
jest.spyOn(axios, 'request').mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
341339
return resultFactory({
342340
url: config.url,
343341
status: 'ERR::SESSION::CONCURRENT_ACCESS',
@@ -350,7 +348,7 @@ describe('client errors', () => {
350348
});
351349

352350
it('raises ApiHttpClientError on success and unknown status', async () => {
353-
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
351+
jest.spyOn(axios, 'request').mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
354352
return resultFactory({
355353
url: config.url,
356354
status: 'ERR::NEW',
@@ -362,7 +360,7 @@ describe('client errors', () => {
362360
);
363361
});
364362
it('raises UpstreamHttpServerError on failure, ERR::SCRAPE::BAD_UPSTREAM_RESPONSE and >=500', async () => {
365-
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
363+
jest.spyOn(axios, 'request').mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
366364
return resultFactory({
367365
url: config.url,
368366
success: false,
@@ -375,7 +373,7 @@ describe('client errors', () => {
375373
);
376374
});
377375
it('raises UpstreamHttpClientError on failure, ERR::SCRAPE::BAD_UPSTREAM_RESPONSE and 4xx status', async () => {
378-
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
376+
jest.spyOn(axios, 'request').mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
379377
return resultFactory({
380378
url: config.url,
381379
success: false,
@@ -398,7 +396,7 @@ describe('client errors', () => {
398396
SESSION: errors.ScrapflySessionError,
399397
};
400398
for (const [resource, err] of Object.entries(resourceErrMap)) {
401-
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
399+
jest.spyOn(axios, 'request').mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
402400
return resultFactory({
403401
url: config.url,
404402
success: false,
@@ -410,7 +408,7 @@ describe('client errors', () => {
410408
});
411409

412410
it('raises ScrapflyError on unhandled failure', async () => {
413-
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
411+
jest.spyOn(axios, 'request').mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
414412
return resultFactory({
415413
url: config.url,
416414
success: false,
@@ -423,7 +421,7 @@ describe('client errors', () => {
423421
);
424422
});
425423
it('raises on unhandled failure', async () => {
426-
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
424+
jest.spyOn(axios, 'request').mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
427425
return resultFactory({
428426
url: config.url,
429427
success: false,
@@ -436,19 +434,19 @@ describe('client errors', () => {
436434
);
437435
});
438436
it('account retrieval status unhandled code (e.g. 404)', async () => {
439-
mockedAxios.request.mockRejectedValue({
437+
jest.spyOn(axios, 'request').mockRejectedValue({
440438
response: { status: 404, data: {} },
441439
});
442440
await expect(client.account()).rejects.toThrow(errors.HttpError);
443441
});
444442
it('account retrieval bad api key (status 401)', async () => {
445-
mockedAxios.request.mockRejectedValue({
443+
jest.spyOn(axios, 'request').mockRejectedValue({
446444
response: { status: 401, data: {} },
447445
});
448446
await expect(client.account()).rejects.toThrow(errors.BadApiKeyError);
449447
});
450448
it('scrape bad api key (status 401)', async () => {
451-
mockedAxios.request.mockRejectedValue({
449+
jest.spyOn(axios, 'request').mockRejectedValue({
452450
response: { status: 401, data: {} },
453451
});
454452
await expect(client.scrape(new ScrapeConfig({ url: 'https://httpbin.dev/json' }))).rejects.toThrow(

__tests__/result.test.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,20 @@
1+
import * as cheerio from 'cheerio';
12
import * as fs from 'fs';
23
import { ScrapeResult } from '../src/result.js';
34
import * as errors from '../src/errors.js';
4-
import { describe, it, expect } from '@jest/globals';
5+
import { describe, it, expect, jest } from '@jest/globals';
56

67
describe('cheerio selector', () => {
78
it('lazy loads and caches itself', () => {
89
const response = JSON.parse(fs.readFileSync('__tests__/data/response_html_success.json', 'utf8'));
910
const result = new ScrapeResult(response);
11+
const spy = jest.spyOn(cheerio, 'load');
1012
expect(result.selector('h1').text()).toEqual('Herman Melville - Moby-Dick');
1113
// make sure calling it twice performs the same
1214
expect(result.selector('h1').text()).toEqual('Herman Melville - Moby-Dick');
15+
// cheerio.load is called exactly once - means it's cached
16+
expect(spy).toHaveBeenCalledTimes(1);
17+
spy.mockRestore();
1318
});
1419
it('throws ContentTypeError when accessing .selector on JSON data', () => {
1520
const response = JSON.parse(fs.readFileSync('__tests__/data/response_json_success.json', 'utf8'));

examples/README.md

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,22 @@
11
# Scrapfly Typescript SDK Examples
22

3-
This directory contains commonly used examples for the Scrapfly Typescript SDK.
3+
This directory contains commonly used examples for the Scrapfly Typescript SDK which is available in Typescript runtimes (bun, deno) as well as javascript ones like Nodejs.
4+
5+
You can use `node` to run the `.js` examples:
6+
7+
```
8+
node examples/basic-get.js
9+
```
10+
11+
Or compile `.ts` examples to `.js`:
12+
13+
```
14+
tsc examples/basic-get.ts -o examples/basic-get.js
15+
node examples/basic-get.js
16+
```
17+
18+
Or run typescript directly through runtimes like `.ts`:
19+
20+
```
21+
bun examples/basic-get.ts
22+
```

examples/basic-get.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
/*
2+
Most basic scrapfly request - GET a provided url
3+
*/
4+
import { ScrapflyClient, ScrapeConfig } from 'scrapfly-sdk';
5+
6+
const key = 'YOUR SCRAPFLY KEY';
7+
const client = new ScrapflyClient({ key });
8+
const result = await client.scrape(
9+
new ScrapeConfig({
10+
url: 'https://httpbin.dev/html',
11+
}),
12+
);
13+
console.log(result.result.content); // html content

examples/concurrent-scrape.js

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@ note:
88
the client will automatically set the limit to your maximum
99
if you set the limit to high expect errors.TooManyConcurrentRequests
1010
*/
11-
import { ScrapflyClient, ScrapeConfig } from 'scrapfly-sdk';
11+
import { ScrapflyClient, ScrapeConfig, log } from 'scrapfly-sdk';
12+
13+
log.setLevel('DEBUG');
1214

1315
const key = 'YOUR SCRAPFLY KEY';
1416
const client = new ScrapflyClient({ key });

examples/get-binary.js

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
/*
2+
* This example shows how to download binary data from scrapfly responses.
3+
*/
4+
import { ScrapflyClient, ScrapeConfig } from 'scrapfly-sdk';
5+
import fs from 'fs';
6+
const key = 'YOUR SCRAPFLY KEY';
7+
const client = new ScrapflyClient({ key });
8+
const result = await client.scrape(
9+
new ScrapeConfig({
10+
url: 'https://web-scraping.dev/product/1',
11+
render_js: true,
12+
js: 'return document.title',
13+
}),
14+
);
15+
// then stream content as base64 buffer:
16+
const data = Buffer.from(result.result.content, 'base64');
17+
fs.writeFileSync('image.png', data);

0 commit comments

Comments
 (0)