From df36c014d640648e768d9904770ed3718aae9153 Mon Sep 17 00:00:00 2001
From: zfcsoftware
Date: Mon, 11 Nov 2024 21:46:30 +0300
Subject: [PATCH 1/4] Removed Axios and added local fetch support
---
package-lock.json | 291 ++++++++++++++++++++++++++++++----------------
package.json | 4 +-
src/lib.ts | 92 +++++++++------
src/types.ts | 27 +++++
4 files changed, 274 insertions(+), 140 deletions(-)
diff --git a/package-lock.json b/package-lock.json
index d0bdbfd..e412cf7 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -9,8 +9,8 @@
"version": "1.0.0",
"license": "MIT",
"dependencies": {
- "axios": "^1.6.8",
- "axios-retry": "^4.4.0"
+ "@types/qs": "^6.9.17",
+ "qs": "^6.13.0"
},
"devDependencies": {
"@jest/globals": "^29.7.0",
@@ -1088,6 +1088,12 @@
"undici-types": "~6.19.2"
}
},
+ "node_modules/@types/qs": {
+ "version": "6.9.17",
+ "resolved": "https://registry.npmjs.org/@types/qs/-/qs-6.9.17.tgz",
+ "integrity": "sha512-rX4/bPcfmvxHDv0XjfJELTTr+iB+tn032nPILqHm5wbthUUUuVtNGGqzhya9XUxjTP8Fpr0qYgSZZKxGY++svQ==",
+ "license": "MIT"
+ },
"node_modules/@types/stack-utils": {
"version": "2.0.3",
"dev": true,
@@ -1194,29 +1200,6 @@
"dev": true,
"license": "MIT"
},
- "node_modules/asynckit": {
- "version": "0.4.0",
- "license": "MIT"
- },
- "node_modules/axios": {
- "version": "1.7.5",
- "license": "MIT",
- "dependencies": {
- "follow-redirects": "^1.15.6",
- "form-data": "^4.0.0",
- "proxy-from-env": "^1.1.0"
- }
- },
- "node_modules/axios-retry": {
- "version": "4.5.0",
- "license": "Apache-2.0",
- "dependencies": {
- "is-retry-allowed": "^2.2.0"
- },
- "peerDependencies": {
- "axios": "0.x || 1.x"
- }
- },
"node_modules/babel-jest": {
"version": "29.7.0",
"dev": true,
@@ -1401,6 +1384,25 @@
"dev": true,
"license": "MIT"
},
+ "node_modules/call-bind": {
+ "version": "1.0.7",
+ "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.7.tgz",
+ "integrity": "sha512-GHTSNSYICQ7scH7sZ+M2rFopRoLh8t2bLSW6BbgrtLsahOIB5iyAVJf9GjWK3cYTDaMj4XdBpM1cA6pIS0Kv2w==",
+ "license": "MIT",
+ "dependencies": {
+ "es-define-property": "^1.0.0",
+ "es-errors": "^1.3.0",
+ "function-bind": "^1.1.2",
+ "get-intrinsic": "^1.2.4",
+ "set-function-length": "^1.2.1"
+ },
+ "engines": {
+ "node": ">= 0.4"
+ },
+ "funding": {
+ "url": "https://github.com/sponsors/ljharb"
+ }
+ },
"node_modules/callsites": {
"version": "3.1.0",
"dev": true,
@@ -1521,16 +1523,6 @@
"dev": true,
"license": "MIT"
},
- "node_modules/combined-stream": {
- "version": "1.0.8",
- "license": "MIT",
- "dependencies": {
- "delayed-stream": "~1.0.0"
- },
- "engines": {
- "node": ">= 0.8"
- }
- },
"node_modules/concat-map": {
"version": "0.0.1",
"dev": true,
@@ -1616,11 +1608,21 @@
"node": ">=0.10.0"
}
},
- "node_modules/delayed-stream": {
- "version": "1.0.0",
+ "node_modules/define-data-property": {
+ "version": "1.1.4",
+ "resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.4.tgz",
+ "integrity": "sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A==",
"license": "MIT",
+ "dependencies": {
+ "es-define-property": "^1.0.0",
+ "es-errors": "^1.3.0",
+ "gopd": "^1.0.1"
+ },
"engines": {
- "node": ">=0.4.0"
+ "node": ">= 0.4"
+ },
+ "funding": {
+ "url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/detect-newline": {
@@ -1690,6 +1692,27 @@
"is-arrayish": "^0.2.1"
}
},
+ "node_modules/es-define-property": {
+ "version": "1.0.0",
+ "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.0.tgz",
+ "integrity": "sha512-jxayLKShrEqqzJ0eumQbVhTYQM27CfT1T35+gCgDFoL82JLsXqTJ76zv6A0YLOgEnLUMvLzsDsGIrl8NFpT2gQ==",
+ "license": "MIT",
+ "dependencies": {
+ "get-intrinsic": "^1.2.4"
+ },
+ "engines": {
+ "node": ">= 0.4"
+ }
+ },
+ "node_modules/es-errors": {
+ "version": "1.3.0",
+ "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz",
+ "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==",
+ "license": "MIT",
+ "engines": {
+ "node": ">= 0.4"
+ }
+ },
"node_modules/escalade": {
"version": "3.1.2",
"dev": true,
@@ -1825,36 +1848,6 @@
"node": ">=8"
}
},
- "node_modules/follow-redirects": {
- "version": "1.15.6",
- "funding": [
- {
- "type": "individual",
- "url": "https://github.com/sponsors/RubenVerborgh"
- }
- ],
- "license": "MIT",
- "engines": {
- "node": ">=4.0"
- },
- "peerDependenciesMeta": {
- "debug": {
- "optional": true
- }
- }
- },
- "node_modules/form-data": {
- "version": "4.0.0",
- "license": "MIT",
- "dependencies": {
- "asynckit": "^0.4.0",
- "combined-stream": "^1.0.8",
- "mime-types": "^2.1.12"
- },
- "engines": {
- "node": ">= 6"
- }
- },
"node_modules/fs.realpath": {
"version": "1.0.0",
"dev": true,
@@ -1874,7 +1867,6 @@
},
"node_modules/function-bind": {
"version": "1.1.2",
- "dev": true,
"license": "MIT",
"funding": {
"url": "https://github.com/sponsors/ljharb"
@@ -1896,6 +1888,25 @@
"node": "6.* || 8.* || >= 10.*"
}
},
+ "node_modules/get-intrinsic": {
+ "version": "1.2.4",
+ "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.2.4.tgz",
+ "integrity": "sha512-5uYhsJH8VJBTv7oslg4BznJYhDoRI6waYCxMmCdnTrcCrHA/fCFKoTFz2JKKE0HdDFUF7/oQuhzumXJK7paBRQ==",
+ "license": "MIT",
+ "dependencies": {
+ "es-errors": "^1.3.0",
+ "function-bind": "^1.1.2",
+ "has-proto": "^1.0.1",
+ "has-symbols": "^1.0.3",
+ "hasown": "^2.0.0"
+ },
+ "engines": {
+ "node": ">= 0.4"
+ },
+ "funding": {
+ "url": "https://github.com/sponsors/ljharb"
+ }
+ },
"node_modules/get-package-type": {
"version": "0.1.0",
"dev": true,
@@ -1942,6 +1953,18 @@
"node": ">=4"
}
},
+ "node_modules/gopd": {
+ "version": "1.0.1",
+ "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.0.1.tgz",
+ "integrity": "sha512-d65bNlIadxvpb/A2abVdlqKqV563juRnZ1Wtk6s1sIR8uNsXR70xqIzVqxVf1eTqDunwT2MkczEeaezCKTZhwA==",
+ "license": "MIT",
+ "dependencies": {
+ "get-intrinsic": "^1.1.3"
+ },
+ "funding": {
+ "url": "https://github.com/sponsors/ljharb"
+ }
+ },
"node_modules/graceful-fs": {
"version": "4.2.11",
"dev": true,
@@ -1955,9 +1978,44 @@
"node": ">=8"
}
},
+ "node_modules/has-property-descriptors": {
+ "version": "1.0.2",
+ "resolved": "https://registry.npmjs.org/has-property-descriptors/-/has-property-descriptors-1.0.2.tgz",
+ "integrity": "sha512-55JNKuIW+vq4Ke1BjOTjM2YctQIvCT7GFzHwmfZPGo5wnrgkid0YQtnAleFSqumZm4az3n2BS+erby5ipJdgrg==",
+ "license": "MIT",
+ "dependencies": {
+ "es-define-property": "^1.0.0"
+ },
+ "funding": {
+ "url": "https://github.com/sponsors/ljharb"
+ }
+ },
+ "node_modules/has-proto": {
+ "version": "1.0.3",
+ "resolved": "https://registry.npmjs.org/has-proto/-/has-proto-1.0.3.tgz",
+ "integrity": "sha512-SJ1amZAJUiZS+PhsVLf5tGydlaVB8EdFpaSO4gmiUKUOxk8qzn5AIy4ZeJUmh22znIdk/uMAUT2pl3FxzVUH+Q==",
+ "license": "MIT",
+ "engines": {
+ "node": ">= 0.4"
+ },
+ "funding": {
+ "url": "https://github.com/sponsors/ljharb"
+ }
+ },
+ "node_modules/has-symbols": {
+ "version": "1.0.3",
+ "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.0.3.tgz",
+ "integrity": "sha512-l3LCuF6MgDNwTDKkdYGEihYjt5pRPbEg46rtlmnSPlUbgmB8LOIrKJbYYFBSbnPaJexMKtiPO8hmeRjRz2Td+A==",
+ "license": "MIT",
+ "engines": {
+ "node": ">= 0.4"
+ },
+ "funding": {
+ "url": "https://github.com/sponsors/ljharb"
+ }
+ },
"node_modules/hasown": {
"version": "2.0.2",
- "dev": true,
"license": "MIT",
"dependencies": {
"function-bind": "^1.1.2"
@@ -2062,16 +2120,6 @@
"node": ">=0.12.0"
}
},
- "node_modules/is-retry-allowed": {
- "version": "2.2.0",
- "license": "MIT",
- "engines": {
- "node": ">=10"
- },
- "funding": {
- "url": "https://github.com/sponsors/sindresorhus"
- }
- },
"node_modules/is-stream": {
"version": "2.0.1",
"dev": true,
@@ -2859,23 +2907,6 @@
"node": ">=8.6"
}
},
- "node_modules/mime-db": {
- "version": "1.52.0",
- "license": "MIT",
- "engines": {
- "node": ">= 0.6"
- }
- },
- "node_modules/mime-types": {
- "version": "2.1.35",
- "license": "MIT",
- "dependencies": {
- "mime-db": "1.52.0"
- },
- "engines": {
- "node": ">= 0.6"
- }
- },
"node_modules/mimic-fn": {
"version": "2.1.0",
"dev": true,
@@ -2934,6 +2965,18 @@
"node": ">=8"
}
},
+ "node_modules/object-inspect": {
+ "version": "1.13.3",
+ "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.3.tgz",
+ "integrity": "sha512-kDCGIbxkDSXE3euJZZXzc6to7fCrKHNI/hSRQnRuQ+BWjFNzZwiFF8fj/6o2t2G9/jTj8PSIYTfCLelLZEeRpA==",
+ "license": "MIT",
+ "engines": {
+ "node": ">= 0.4"
+ },
+ "funding": {
+ "url": "https://github.com/sponsors/ljharb"
+ }
+ },
"node_modules/once": {
"version": "1.4.0",
"dev": true,
@@ -3120,10 +3163,6 @@
"node": ">= 6"
}
},
- "node_modules/proxy-from-env": {
- "version": "1.1.0",
- "license": "MIT"
- },
"node_modules/pure-rand": {
"version": "6.1.0",
"dev": true,
@@ -3139,6 +3178,21 @@
],
"license": "MIT"
},
+ "node_modules/qs": {
+ "version": "6.13.0",
+ "resolved": "https://registry.npmjs.org/qs/-/qs-6.13.0.tgz",
+ "integrity": "sha512-+38qI9SOr8tfZ4QmJNplMUxqjbe7LKvvZgWdExBOmd+egZTtjLB67Gu0HRX3u/XOq7UU2Nx6nsjvS16Z9uwfpg==",
+ "license": "BSD-3-Clause",
+ "dependencies": {
+ "side-channel": "^1.0.6"
+ },
+ "engines": {
+ "node": ">=0.6"
+ },
+ "funding": {
+ "url": "https://github.com/sponsors/ljharb"
+ }
+ },
"node_modules/react-is": {
"version": "18.3.1",
"dev": true,
@@ -3203,6 +3257,23 @@
"semver": "bin/semver.js"
}
},
+ "node_modules/set-function-length": {
+ "version": "1.2.2",
+ "resolved": "https://registry.npmjs.org/set-function-length/-/set-function-length-1.2.2.tgz",
+ "integrity": "sha512-pgRc4hJ4/sNjWCSS9AmnS40x3bNMDTknHgL5UaMBTMyJnU90EgWh1Rz+MC9eFu4BuN/UwZjKQuY/1v3rM7HMfg==",
+ "license": "MIT",
+ "dependencies": {
+ "define-data-property": "^1.1.4",
+ "es-errors": "^1.3.0",
+ "function-bind": "^1.1.2",
+ "get-intrinsic": "^1.2.4",
+ "gopd": "^1.0.1",
+ "has-property-descriptors": "^1.0.2"
+ },
+ "engines": {
+ "node": ">= 0.4"
+ }
+ },
"node_modules/shebang-command": {
"version": "2.0.0",
"dev": true,
@@ -3222,6 +3293,24 @@
"node": ">=8"
}
},
+ "node_modules/side-channel": {
+ "version": "1.0.6",
+ "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.0.6.tgz",
+ "integrity": "sha512-fDW/EZ6Q9RiO8eFG8Hj+7u/oW+XrPTIChwCOM2+th2A6OblDtYYIpve9m+KvI9Z4C9qSEXlaGR6bTEYHReuglA==",
+ "license": "MIT",
+ "dependencies": {
+ "call-bind": "^1.0.7",
+ "es-errors": "^1.3.0",
+ "get-intrinsic": "^1.2.4",
+ "object-inspect": "^1.13.1"
+ },
+ "engines": {
+ "node": ">= 0.4"
+ },
+ "funding": {
+ "url": "https://github.com/sponsors/ljharb"
+ }
+ },
"node_modules/signal-exit": {
"version": "3.0.7",
"dev": true,
diff --git a/package.json b/package.json
index e2c133b..98c844e 100644
--- a/package.json
+++ b/package.json
@@ -38,7 +38,7 @@
"typescript": "^5.4.5"
},
"dependencies": {
- "axios": "^1.6.8",
- "axios-retry": "^4.4.0"
+ "@types/qs": "^6.9.17",
+ "qs": "^6.13.0"
}
}
diff --git a/src/lib.ts b/src/lib.ts
index 2def781..901c141 100644
--- a/src/lib.ts
+++ b/src/lib.ts
@@ -1,7 +1,7 @@
-import axios, { AxiosError, AxiosInstance, AxiosResponse } from "axios";
import { PlayWithBrowser } from "./playwithbrowser";
import { GeoCode } from "./geocode";
-import { DoRequest, DoResponse, StatisticsResponse } from "./types";
+import { DoRequest, DoResponse, StatisticsResponse, FetchConfig, MakeRequestResponse } from "./types";
+import qs from "qs";
export const API_URL = "https://api.scrape.do";
@@ -25,19 +25,47 @@ export const ValidStatusCodeRanges: { min: number; max: number }[] = [
* console.log(response);
*/
export class ScrapeDo {
- private reqClient: AxiosInstance;
/**
* Initializes a new instance of ScrapeDo.
* @param token - The API token used for authenticating requests.
*/
constructor(public token: string) {
- this.reqClient = axios.create({
- baseURL: API_URL,
- params: {
- token: token,
- },
- });
+ this.token = token;
+ }
+
+ /**
+ * Make a scrape request to the API
+ * @param config - Configuration for the request
+ * @returns Response of the scraping result or error
+ */
+
+ async makeRequest(config: FetchConfig): Promise {
+ let headers: Record = config.headers || {};
+
+ if (typeof config?.data === 'object') {
+ config.data = JSON.stringify(config.data);
+ headers['content-type'] = 'application/json';
+ }
+ let reqUrl = `${API_URL}${config.path}?${qs.stringify(config.params, { indices: false })}`
+
+ let response: MakeRequestResponse = await fetch(reqUrl, {
+ headers,
+ method: config.method || 'GET',
+ body: config.data
+ })
+
+ let data: string | any = await response.text();
+ try { data = JSON.parse(data) } catch { }
+
+ response.data = data
+
+ if (ValidStatusCodes.includes(response.status) || ValidStatusCodeRanges.some((range) => response.status >= range.min && response.status <= range.max)) {
+ return response
+ } else {
+ throw response
+ }
+
}
/**
@@ -104,28 +132,17 @@ export class ScrapeDo {
customHeaders: options.customHeaders ? true : undefined,
setCookies: cookies,
playWithBrowser: pwbParsed,
+ token: this.token
};
- return this.reqClient
- .request({
- method: method,
- url: "/",
- headers: headers,
- data: body,
- params: params,
- validateStatus: (status) => {
- if (options.transparentResponse) {
- return true;
- } else {
- if (ValidStatusCodes.includes(status) || ValidStatusCodeRanges.some((range) => status >= range.min && status <= range.max)) {
- return true;
- } else {
- return false;
- }
- }
- },
- })
- .then((response: AxiosResponse) => {
+ return this.makeRequest({
+ method: method,
+ path: "/",
+ headers: headers,
+ data: body,
+ params,
+ })
+ .then((response: MakeRequestResponse) => {
const sdoHeaders: Partial = {
cookies: response.headers["Scrape.do-Cookies"],
remainingCredits: response.headers["Scrape.do-Remaining-Credits"],
@@ -160,14 +177,14 @@ export class ScrapeDo {
};
}
})
- .catch((error: AxiosError) => {
- if (error.response?.data && error.response.data["Message"]) {
+ .catch((error: MakeRequestResponse) => {
+ if (error.data && error.data["Message"]) {
return {
- url: error.response.data["URL"],
- statusCode: error.response.data["StatusCode"],
- message: error.response.data["Message"],
- possibleCauses: error.response.data["PossibleCauses"],
- contact: error.response.data["Contact"],
+ url: error.data["URL"],
+ statusCode: error.data["StatusCode"],
+ message: error.data["Message"],
+ possibleCauses: error.data["PossibleCauses"],
+ contact: error.data["Contact"],
};
} else {
throw error;
@@ -182,6 +199,7 @@ export class ScrapeDo {
* @see https://scrape.do/documentation/#usage-statistics-api
*/
async statistics() {
- return this.reqClient.get("/info").then((response) => response.data);
+ return this.makeRequest({ path: "/info", params: { token: this.token } })
+ .then((response): StatisticsResponse => response.data);
}
}
diff --git a/src/types.ts b/src/types.ts
index b1ce232..5b716cd 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -173,3 +173,30 @@ export interface StatisticsResponse {
RemainingConcurrentRequest: number;
RemainingMonthlyRequest: number;
}
+
+
+/**
+ * Configuration for a fetch request.
+ * @property {string} method - HTTP method for the request.
+ * @property {string} params - Query parameters for the request.
+ * @property {any} data - Data to be sent with the request.
+ * @property {Record} headers - Headers for the request.
+ * @property {string} path - Path for the request.
+ */
+export interface FetchConfig {
+ method?: string;
+ params?: any;
+ data?: any;
+ headers?: Record;
+ path: string;
+}
+
+
+/**
+ * Response structure for a fetch request.
+ * @property {any} data - Data returned from the request.
+ * @property {Response} response - Response object from the fetch request.
+ */
+export interface MakeRequestResponse extends Response {
+ data?: any;
+}
\ No newline at end of file
From a211a568b26f7011aadf8709da299fcac9e4b8d4 Mon Sep 17 00:00:00 2001
From: zfcsoftware
Date: Mon, 11 Nov 2024 22:18:12 +0300
Subject: [PATCH 2/4] Made some design changes to the readme file
---
README.md | 68 +++++++++++++++++++++++++++++++++++++------------------
1 file changed, 46 insertions(+), 22 deletions(-)
diff --git a/README.md b/README.md
index 97a6315..8c63d83 100644
--- a/README.md
+++ b/README.md
@@ -1,24 +1,28 @@
-# @scrape-do/client
-
-#### Scrape.do's official http client for node.js
+
+
+
Scrape Do Node Client
+ Scrape.do's official http client for node.js
+
## How to install?
```bash
-> npm install @scrape-do/client
-# or get it from github
-> npm install git://git@github.com/scrape-do/node-client
+npm i @scrape-do/client
```
+or install with github
-## How to build from scratch
+```bash
+npm install git://git@github.com/scrape-do/node-client
+```
-#### If you want to contribute to the library or include your own customisations, you can recompile the library in this way.
+## How to build from scratch
+If you want to contribute to the library or include your own customisations, you can recompile the library in this way.
```bash
-> git clone https://github.com/scrape-do/node-client
-> npm i
+git clone https://github.com/scrape-do/node-client
+npm i
# build with
-> npm build
+npm build
```
## Example Usages
@@ -28,7 +32,9 @@
The super parameter enables the use of a residential proxy for the request. When this parameter is set to true, the request will be routed through a residential IP address. This means that the IP address will typically appear as if it belongs to a mobile network provider, adding an additional layer of anonymity and making the request look more like regular web traffic.
```typescript
-const client = new ScrapeDo("example_token");
+const { ScrapeDo } = require("@scrape-do/client");
+
+const client = new ScrapeDo("your_api_token");
const response = await client.sendRequest("GET", {
url: "https://httpbin.co/anything",
super: true,
@@ -42,7 +48,9 @@ console.log(response);
The geoCode parameter allows you to specify the geographic location from which the request should appear to originate. By setting a specific country code, such as "us" for the United States, the request will be routed through an IP address from that region. This is especially useful for scraping websites that serve region-specific content or pricing, allowing you to access data as if you were browsing from that location.
```typescript
-const client = new ScrapeDo("example_token");
+const { ScrapeDo } = require("@scrape-do/client");
+
+const client = new ScrapeDo("your_api_token");
const response = await client.sendRequest("GET", {
url: "https://httpbin.co/anything",
geoCode: "us",
@@ -56,7 +64,9 @@ console.log(response);
The regionalGeoCode parameter allows you to target requests from a broader geographic region, rather than a specific country. By specifying a regional code such as "europe" or "asia", your request will be routed through an IP address from that particular region. This is useful for scraping content that may be region-restricted, or for accessing region-specific data without the need to specify individual country codes.
```typescript
-const client = new ScrapeDo("example_token");
+const { ScrapeDo } = require("@scrape-do/client");
+
+const client = new ScrapeDo("your_api_token");
const response = await client.sendRequest("GET", {
url: "https://httpbin.co/anything",
regionalGeoCode: "europe",
@@ -79,7 +89,9 @@ Key points to note:
- Sessions only for successful requests: A session will only be created if the initial request is successful.
```typescript
-const client = new ScrapeDo("example_token");
+const { ScrapeDo } = require("@scrape-do/client");
+
+const client = new ScrapeDo("your_api_token");
const response = await client.sendRequest("GET", {
url: "https://httpbin.co/anything",
sessionId: "1234",
@@ -93,7 +105,9 @@ console.log(response);
The customHeaders option gives you full control over all headers sent to the target website. When you use customHeaders, the headers you provide will completely replace the default ones. This feature is useful when you need to define specific headers like User-Agent, Accept, Cookies, and more, ensuring that only your specified headers are sent with the request.
```typescript
-const client = new ScrapeDo("example_token");
+const { ScrapeDo } = require("@scrape-do/client");
+
+const client = new ScrapeDo("your_api_token");
const response = await client.sendRequest("GET", {
url: "https://httpbin.co/anything",
customHeaders: {
@@ -111,7 +125,9 @@ extraHeaders is used when you want to add one or more headers specifically requi
The following example returns the response of how you requested from httpbin.co. You should see the ‘Key’ header in the header section of the response.
```typescript
-const client = new ScrapeDo("example_token");
+const { ScrapeDo } = require("@scrape-do/client");
+
+const client = new ScrapeDo("your_api_token");
const response = await client.sendRequest("GET", {
url: "https://httpbin.co/anything",
extraHeaders: {
@@ -127,7 +143,9 @@ console.log(response);
The forwardHeaders option is ideal when you want to forward your custom headers directly to the target website without any additional headers being generated or modified by the service. This approach makes the request appear as if it is being made directly from your end, preserving the original header structure.
```typescript
-const client = new ScrapeDo("example_token");
+const { ScrapeDo } = require("@scrape-do/client");
+
+const client = new ScrapeDo("your_api_token");
const response = await client.sendRequest("GET", {
url: "https://httpbin.co/anything",
forwardHeaders: {
@@ -143,7 +161,9 @@ console.log(response);
The render parameter allows for the execution of JavaScript during the request, enabling full browser-like rendering. When this parameter is set to true, the service will render the target webpage as if it were being loaded in a real browser, executing all JavaScript, loading dynamic content, and handling client-side interactions. This approach is particularly useful for scraping websites that rely heavily on JavaScript to display their content, providing a more accurate and “humanized” view of the page.
```typescript
-const client = new ScrapeDo("example_token");
+const { ScrapeDo } = require("@scrape-do/client");
+
+const client = new ScrapeDo("your_api_token");
const response = await client.sendRequest("GET", {
url: "https://httpbin.co/anything",
render: true,
@@ -168,7 +188,9 @@ Key information retrieved:
> For security reasons, you can send up to 10 requests per minute to this endpoint. If you exceed this rate, you will receive a 429 Too Many Requests error.
```typescript
-const client = new ScrapeDo("example_token");
+const { ScrapeDo } = require("@scrape-do/client");
+
+const client = new ScrapeDo("your_api_token");
const stats = await client.statistics();
console.log(stats);
@@ -184,7 +206,9 @@ In this example, multiple parameters are combined to showcase advanced scraping
- [playWithBrowser](https://scrape.do/documentation/#play-with-browser?utm_source=github&utm_medium=node-client): Provides the ability to interact with the browser while rendering the page. For example, you can wait for specific elements to load or perform actions like clicking buttons. In this case, it waits for the element to ensure the page is fully loaded before proceeding.
```typescript
-const client = new ScrapeDo("example_token");
+const { ScrapeDo } = require("@scrape-do/client");
+
+const client = new ScrapeDo("your_api_token");
const response = await client.sendRequest("GET", {
url: "https://example.com",
render: true,
@@ -215,4 +239,4 @@ console.log(response);
## Disclaimer
-#### Any damages arising from the use of the library or service or any other legal situation cannot be associated with the scrape.do legal entity and team. The responsibility lies entirely with the user.
+#### Any damages arising from the use of the library or service or any other legal situation cannot be associated with the scrape.do legal entity and team. The responsibility lies entirely with the user.
\ No newline at end of file
From 1da76a6486ac76f06f200f2049235d9e18ecec5a Mon Sep 17 00:00:00 2001
From: zfcsoftware
Date: Wed, 13 Nov 2024 04:31:15 +0300
Subject: [PATCH 3/4] - Added the ability to return the header information in
the request sent to scrape.do. It can be used in case of need for values such
as server-timing, scrape.do-rid in the header that are not provided with the
library. - Added 2 new tests to check if the sdo values in the header are
returned correctly and to check if the html returns an error. -
playWithBrowser Test has been made more detailed.
---
.env | 2 ++
package-lock.json | 13 +++++++++++
package.json | 1 +
src/lib.ts | 19 ++++++++++------
src/types.ts | 4 ++++
tests/lib.test.ts | 56 ++++++++++++++++++++++++++++++++++++++++++-----
6 files changed, 83 insertions(+), 12 deletions(-)
create mode 100644 .env
diff --git a/.env b/.env
new file mode 100644
index 0000000..43b2bd1
--- /dev/null
+++ b/.env
@@ -0,0 +1,2 @@
+# Only the TOKEN element is required for the test.
+TOKEN=
\ No newline at end of file
diff --git a/package-lock.json b/package-lock.json
index e412cf7..7d738af 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -10,6 +10,7 @@
"license": "MIT",
"dependencies": {
"@types/qs": "^6.9.17",
+ "dotenv": "^16.4.5",
"qs": "^6.13.0"
},
"devDependencies": {
@@ -1649,6 +1650,18 @@
"node": "^14.15.0 || ^16.10.0 || >=18.0.0"
}
},
+ "node_modules/dotenv": {
+ "version": "16.4.5",
+ "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.5.tgz",
+ "integrity": "sha512-ZmdL2rui+eB2YwhsWzjInR8LldtZHGDoQ1ugH85ppHKwpUHL7j7rN0Ti9NCnGiQbhaZ11FpR+7ao1dNsmduNUg==",
+ "license": "BSD-2-Clause",
+ "engines": {
+ "node": ">=12"
+ },
+ "funding": {
+ "url": "https://dotenvx.com"
+ }
+ },
"node_modules/ejs": {
"version": "3.1.10",
"dev": true,
diff --git a/package.json b/package.json
index 98c844e..c071ca5 100644
--- a/package.json
+++ b/package.json
@@ -39,6 +39,7 @@
},
"dependencies": {
"@types/qs": "^6.9.17",
+ "dotenv": "^16.4.5",
"qs": "^6.13.0"
}
}
diff --git a/src/lib.ts b/src/lib.ts
index 901c141..0140095 100644
--- a/src/lib.ts
+++ b/src/lib.ts
@@ -59,6 +59,9 @@ export class ScrapeDo {
try { data = JSON.parse(data) } catch { }
response.data = data
+
+ // There may be information that you want to get from the returned header. However, since this is not an important part, it is not important to get an error.
+ try { response.sdoResponseHeaders = Object.fromEntries(response.headers); } catch { }
if (ValidStatusCodes.includes(response.status) || ValidStatusCodeRanges.some((range) => response.status >= range.min && response.status <= range.max)) {
return response
@@ -143,14 +146,16 @@ export class ScrapeDo {
params,
})
.then((response: MakeRequestResponse) => {
+ // sdoResponseHeaders will return empty if Object.fromEntries fails. For this reason, .get is used when getting header values.
const sdoHeaders: Partial = {
- cookies: response.headers["Scrape.do-Cookies"],
- remainingCredits: response.headers["Scrape.do-Remaining-Credits"],
- requestCost: response.headers["Scrape.do-Request-Cost"],
- resolvedURL: response.headers["Scrape.do-Resolved-Url"],
- targetURL: response.headers["Scrape.do-Target-Url"],
- initialStatusCode: response.headers["Scrape.do-Initial-Status-Code"],
- targetRedirectedLocation: response.headers["Scrape.do-Target-Redirected-Location"],
+ cookies: response.headers.get("scrape.do-cookies")?.toString(),
+ remainingCredits: response.headers.get("scrape.do-remaining-credits")?.toString(),
+ requestCost: response.headers.get("scrape.do-request-cost")?.toString(),
+ resolvedURL: response.headers.get("scrape.do-resolved-url")?.toString(),
+ targetURL: response.headers.get("scrape.do-target-url")?.toString(),
+ initialStatusCode: response.headers.get("scrape.do-initial-status-code")?.toString(),
+ targetRedirectedLocation: response.headers.get("scrape.do-target-redirected-location")?.toString(),
+ sdoResponseHeaders: response.sdoResponseHeaders
};
if (options.returnJSON) {
diff --git a/src/types.ts b/src/types.ts
index 5b716cd..7f08cbb 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -115,6 +115,7 @@ export type DoResponse = {
* @property {string} [targetURL] - The target URL that was scraped
* @property {string} [initialStatusCode] - The status code of the initial request before any redirects
* @property {string} [targetRedirectedLocation] - Final location after following redirects
+ * @property {Record} [sdoResponseHeaders] - Additional headers returned in the response
*/
export interface DoHeaders {
cookies?: string;
@@ -124,6 +125,7 @@ export interface DoHeaders {
targetURL?: string;
initialStatusCode?: string;
targetRedirectedLocation?: string;
+ sdoResponseHeaders?: Record;
}
/**
@@ -195,8 +197,10 @@ export interface FetchConfig {
/**
* Response structure for a fetch request.
* @property {any} data - Data returned from the request.
+ * @property {Record} sdoResponseHeaders - Contains the headers returned from the request sent to Scrape.do. It is independent of the headers in the request sent to the site.
* @property {Response} response - Response object from the fetch request.
*/
export interface MakeRequestResponse extends Response {
data?: any;
+ sdoResponseHeaders?: Record;
}
\ No newline at end of file
diff --git a/tests/lib.test.ts b/tests/lib.test.ts
index 1e654ef..c1411fa 100644
--- a/tests/lib.test.ts
+++ b/tests/lib.test.ts
@@ -1,9 +1,33 @@
import { describe, expect, it, test } from "@jest/globals";
import { ScrapeDo } from "../src/lib";
+import 'dotenv/config'
const TOKEN = process.env.TOKEN || "";
describe("Usability tests", () => {
+
+ test("Should contain scrape.do-prefixed headers in the response header", async () => {
+ const client = new ScrapeDo(TOKEN);
+ const response = await client.sendRequest("GET", {
+ url: "https://httpbin.co/anything"
+ });
+
+ expect(response.statusCode).toBe(200);
+ expect(response).toHaveProperty('requestCost')
+ expect(response.requestCost).toBeDefined();
+ })
+
+ test("Should handle HTML responses without errors", async () => {
+ const client = new ScrapeDo(TOKEN);
+ const response = await client.sendRequest("GET", {
+ url: "https://httpbin.co/html"
+ });
+
+ expect(response.statusCode).toBe(200);
+ expect(response).toHaveProperty('content')
+ expect(response.content).toContain(" {
const client = new ScrapeDo(TOKEN);
const response = await client.sendRequest("GET", {
@@ -16,6 +40,7 @@ describe("Usability tests", () => {
expect(response.statusCode).toBe(200);
expect(response.content.headers["A123"]).toStrictEqual(["Extra Header"]);
});
+
test("Should be able to get successful response with custom headers", async () => {
const client = new ScrapeDo(TOKEN);
const response = await client.sendRequest("GET", {
@@ -28,6 +53,7 @@ describe("Usability tests", () => {
expect(response.statusCode).toBe(200);
expect(response.content.headers["A123"]).toStrictEqual(["Custom Header"]);
});
+
test("Should be able to get successful response with forward headers", async () => {
const client = new ScrapeDo(TOKEN);
const response = await client.sendRequest("GET", {
@@ -40,6 +66,7 @@ describe("Usability tests", () => {
expect(response.statusCode).toBe(200);
expect(response.content.headers["A123"]).toStrictEqual(["Forward Header"]);
});
+
test("Should be able to get successful response with cookies", async () => {
const client = new ScrapeDo(TOKEN);
const response = await client.sendRequest("GET", {
@@ -52,6 +79,7 @@ describe("Usability tests", () => {
expect(response.statusCode).toBe(200);
expect(response.content.headers["Cookie"]).toStrictEqual(["A123=Cookie"]);
});
+
test("Should throw error if setCookies is used with customHeaders", async () => {
const client = new ScrapeDo(TOKEN);
await expect(
@@ -66,25 +94,41 @@ describe("Usability tests", () => {
})
).rejects.toThrow("setCookies cannot be used with customHeaders, extraHeaders or forwardHeaders");
});
+
test("Should get successful response with render and playWithBrowser", async () => {
const client = new ScrapeDo(TOKEN);
const response = await client.sendRequest("GET", {
- url: "https://httpbin.co/anything",
+ url: "https://httpbin.co",
render: true,
returnJSON: true,
playWithBrowser: [
{
Action: "WaitSelector",
- WaitSelector: "body",
+ WaitSelector: "a code",
+ },
+ {
+ Action: "ScrollY",
+ Value: 100
},
+ {
+ Action: "Wait",
+ Timeout: 1000
+ },
+ {
+ Action: "Execute",
+ Execute: "window.scrollY"
+ }
],
});
expect(response.statusCode).toBe(200);
- expect(response.actionResults).toHaveLength(1);
- expect(response.actionResults![0].action).toBe("WaitSelector");
- expect(response.actionResults![0].success).toBe(true);
+ expect(response.actionResults).toHaveLength(4);
+ expect(response.actionResults?.at(-1)?.action).toBe("Execute");
+ expect(response.actionResults?.at(-1)?.success).toBe(true);
+ // Since the scrollTo operation is asynchronous, its value may not be exactly 100. For this reason, it is only checked if it is greater than 0.
+ expect(response.actionResults?.at(-1)?.response).toBeGreaterThan(0);
});
+
test("Should get successful response with render and super proxy", async () => {
const client = new ScrapeDo(TOKEN);
const response = await client.sendRequest("GET", {
@@ -95,6 +139,7 @@ describe("Usability tests", () => {
expect(response.statusCode).toBe(200);
});
+
test("Should get successful response from statistics request", async () => {
const client = new ScrapeDo(TOKEN);
const stats = await client.statistics();
@@ -102,4 +147,5 @@ describe("Usability tests", () => {
expect(stats.IsActive).toBe(true);
expect(stats.RemainingMonthlyRequest).toBeGreaterThan(0);
});
+
});
From 7e39f38ef0f8e3b2339e9028454c90f1b4b01dd5 Mon Sep 17 00:00:00 2001
From: zfcsoftware
Date: Wed, 13 Nov 2024 10:13:20 +0300
Subject: [PATCH 4/4] Some improvements to the readme file
---
README.md | 43 +++++++++++++++++++++++++++++++++----------
1 file changed, 33 insertions(+), 10 deletions(-)
diff --git a/README.md b/README.md
index 8c63d83..0f9c83a 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,14 @@
Scrape Do Node Client
- Scrape.do's official http client for node.js
+ Get unblocked while scraping the web - we bypass anti-bots and rotate proxies while you only pay for successful requests.
+
+
+
+
+
+
+
## How to install?
@@ -15,14 +22,16 @@ or install with github
npm install git://git@github.com/scrape-do/node-client
```
-## How to build from scratch
-If you want to contribute to the library or include your own customisations, you can recompile the library in this way.
+## How Do I Import the Library?
-```bash
-git clone https://github.com/scrape-do/node-client
-npm i
-# build with
-npm build
+```js
+// CommonJS
+const { ScrapeDo } = require("@scrape-do/client");
+```
+
+```typescript
+// Module - TypeScript
+import { ScrapeDo } from '@scrape-do/client'
```
## Example Usages
@@ -200,8 +209,11 @@ console.log(stats);
In this example, multiple parameters are combined to showcase advanced scraping capabilities. By using a combination of render, super, geoCode, and playWithBrowser, you can perform complex scraping tasks that require JavaScript execution, residential proxies, geographical targeting, and interactive browser actions:
-- [render: true](https://scrape.do/documentation/#js-render?utm_source=github&utm_medium=node-client): Enables JavaScript execution to fully render the webpage, allowing for the scraping of dynamic content that relies on client-side scripting.
-- [super: true](https://scrape.do/documentation/#super-residential--mobile?utm_source=github&utm_medium=node-client): Utilizes a residential proxy, which makes the request appear as if it is coming from a typical user on a mobile network, providing enhanced anonymity and avoiding blocks from anti-scraping measures.
+> [!WARNING]
+> The browser created with this endpoint can be detected. It can be used for simple tasks such as waiting for the page to load, interacting with the page in your scraping tasks.
+
+- [render](https://scrape.do/documentation/#js-render?utm_source=github&utm_medium=node-client): Enables JavaScript execution to fully render the webpage, allowing for the scraping of dynamic content that relies on client-side scripting.
+- [super](https://scrape.do/documentation/#super-residential--mobile?utm_source=github&utm_medium=node-client): Utilizes a residential proxy, which makes the request appear as if it is coming from a typical user on a mobile network, providing enhanced anonymity and avoiding blocks from anti-scraping measures.
- [geoCode](https://scrape.do/documentation/#geo-targeting?utm_source=github&utm_medium=node-client): "us": Targets a specific geographic location for the request, in this case, the United States. This is useful for scraping content that varies by region, such as localized prices or region-specific data.
- [playWithBrowser](https://scrape.do/documentation/#play-with-browser?utm_source=github&utm_medium=node-client): Provides the ability to interact with the browser while rendering the page. For example, you can wait for specific elements to load or perform actions like clicking buttons. In this case, it waits for the element to ensure the page is fully loaded before proceeding.
@@ -225,6 +237,17 @@ const response = await client.sendRequest("GET", {
console.log(response);
```
+## How to build from scratch
+If you want to contribute to the library or include your own customisations, you can recompile the library in this way.
+
+```bash
+git clone https://github.com/scrape-do/node-client
+npm i
+# build with
+npm build
+```
+
+
## Official links
- [Scrape.do](https://scrape.do?utm_source=github&utm_medium=node-client)