Skip to content

Commit 1da76a6

Browse files
committed
- Added the ability to return the header information in the request sent to scrape.do. It can be used in case of need for values such as server-timing, scrape.do-rid in the header that are not provided with the library.
- Added 2 new tests to check if the sdo values in the header are returned correctly and to check if the html returns an error. - playWithBrowser Test has been made more detailed.
1 parent a211a56 commit 1da76a6

File tree

6 files changed

+83
-12
lines changed

6 files changed

+83
-12
lines changed

.env

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# Only the TOKEN element is required for the test.
2+
TOKEN=<your-api-token>

package-lock.json

Lines changed: 13 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
},
4040
"dependencies": {
4141
"@types/qs": "^6.9.17",
42+
"dotenv": "^16.4.5",
4243
"qs": "^6.13.0"
4344
}
4445
}

src/lib.ts

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,9 @@ export class ScrapeDo {
5959
try { data = JSON.parse(data) } catch { }
6060

6161
response.data = data
62+
63+
// There may be information that you want to get from the returned header. However, since this is not an important part, it is not important to get an error.
64+
try { response.sdoResponseHeaders = Object.fromEntries(response.headers); } catch { }
6265

6366
if (ValidStatusCodes.includes(response.status) || ValidStatusCodeRanges.some((range) => response.status >= range.min && response.status <= range.max)) {
6467
return response
@@ -143,14 +146,16 @@ export class ScrapeDo {
143146
params,
144147
})
145148
.then((response: MakeRequestResponse) => {
149+
// sdoResponseHeaders will return empty if Object.fromEntries fails. For this reason, .get is used when getting header values.
146150
const sdoHeaders: Partial<DoResponse> = {
147-
cookies: response.headers["Scrape.do-Cookies"],
148-
remainingCredits: response.headers["Scrape.do-Remaining-Credits"],
149-
requestCost: response.headers["Scrape.do-Request-Cost"],
150-
resolvedURL: response.headers["Scrape.do-Resolved-Url"],
151-
targetURL: response.headers["Scrape.do-Target-Url"],
152-
initialStatusCode: response.headers["Scrape.do-Initial-Status-Code"],
153-
targetRedirectedLocation: response.headers["Scrape.do-Target-Redirected-Location"],
151+
cookies: response.headers.get("scrape.do-cookies")?.toString(),
152+
remainingCredits: response.headers.get("scrape.do-remaining-credits")?.toString(),
153+
requestCost: response.headers.get("scrape.do-request-cost")?.toString(),
154+
resolvedURL: response.headers.get("scrape.do-resolved-url")?.toString(),
155+
targetURL: response.headers.get("scrape.do-target-url")?.toString(),
156+
initialStatusCode: response.headers.get("scrape.do-initial-status-code")?.toString(),
157+
targetRedirectedLocation: response.headers.get("scrape.do-target-redirected-location")?.toString(),
158+
sdoResponseHeaders: response.sdoResponseHeaders
154159
};
155160

156161
if (options.returnJSON) {

src/types.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ export type DoResponse = {
115115
* @property {string} [targetURL] - The target URL that was scraped
116116
* @property {string} [initialStatusCode] - The status code of the initial request before any redirects
117117
* @property {string} [targetRedirectedLocation] - Final location after following redirects
118+
* @property {Record<string, string>} [sdoResponseHeaders] - Additional headers returned in the response
118119
*/
119120
export interface DoHeaders {
120121
cookies?: string;
@@ -124,6 +125,7 @@ export interface DoHeaders {
124125
targetURL?: string;
125126
initialStatusCode?: string;
126127
targetRedirectedLocation?: string;
128+
sdoResponseHeaders?: Record<string, string>;
127129
}
128130

129131
/**
@@ -195,8 +197,10 @@ export interface FetchConfig {
195197
/**
196198
* Response structure for a fetch request.
197199
* @property {any} data - Data returned from the request.
200+
* @property {Record<string, string>} sdoResponseHeaders - Contains the headers returned from the request sent to Scrape.do. It is independent of the headers in the request sent to the site.
198201
* @property {Response} response - Response object from the fetch request.
199202
*/
200203
export interface MakeRequestResponse extends Response {
201204
data?: any;
205+
sdoResponseHeaders?: Record<string, string>;
202206
}

tests/lib.test.ts

Lines changed: 51 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,33 @@
11
import { describe, expect, it, test } from "@jest/globals";
22
import { ScrapeDo } from "../src/lib";
3+
import 'dotenv/config'
34

45
const TOKEN = process.env.TOKEN || "";
56

67
describe("Usability tests", () => {
8+
9+
test("Should contain scrape.do-prefixed headers in the response header", async () => {
10+
const client = new ScrapeDo(TOKEN);
11+
const response = await client.sendRequest("GET", {
12+
url: "https://httpbin.co/anything"
13+
});
14+
15+
expect(response.statusCode).toBe(200);
16+
expect(response).toHaveProperty('requestCost')
17+
expect(response.requestCost).toBeDefined();
18+
})
19+
20+
test("Should handle HTML responses without errors", async () => {
21+
const client = new ScrapeDo(TOKEN);
22+
const response = await client.sendRequest("GET", {
23+
url: "https://httpbin.co/html"
24+
});
25+
26+
expect(response.statusCode).toBe(200);
27+
expect(response).toHaveProperty('content')
28+
expect(response.content).toContain("<html");
29+
})
30+
731
test("Should be able to get successful response with extra headers", async () => {
832
const client = new ScrapeDo(TOKEN);
933
const response = await client.sendRequest("GET", {
@@ -16,6 +40,7 @@ describe("Usability tests", () => {
1640
expect(response.statusCode).toBe(200);
1741
expect(response.content.headers["A123"]).toStrictEqual(["Extra Header"]);
1842
});
43+
1944
test("Should be able to get successful response with custom headers", async () => {
2045
const client = new ScrapeDo(TOKEN);
2146
const response = await client.sendRequest("GET", {
@@ -28,6 +53,7 @@ describe("Usability tests", () => {
2853
expect(response.statusCode).toBe(200);
2954
expect(response.content.headers["A123"]).toStrictEqual(["Custom Header"]);
3055
});
56+
3157
test("Should be able to get successful response with forward headers", async () => {
3258
const client = new ScrapeDo(TOKEN);
3359
const response = await client.sendRequest("GET", {
@@ -40,6 +66,7 @@ describe("Usability tests", () => {
4066
expect(response.statusCode).toBe(200);
4167
expect(response.content.headers["A123"]).toStrictEqual(["Forward Header"]);
4268
});
69+
4370
test("Should be able to get successful response with cookies", async () => {
4471
const client = new ScrapeDo(TOKEN);
4572
const response = await client.sendRequest("GET", {
@@ -52,6 +79,7 @@ describe("Usability tests", () => {
5279
expect(response.statusCode).toBe(200);
5380
expect(response.content.headers["Cookie"]).toStrictEqual(["A123=Cookie"]);
5481
});
82+
5583
test("Should throw error if setCookies is used with customHeaders", async () => {
5684
const client = new ScrapeDo(TOKEN);
5785
await expect(
@@ -66,25 +94,41 @@ describe("Usability tests", () => {
6694
})
6795
).rejects.toThrow("setCookies cannot be used with customHeaders, extraHeaders or forwardHeaders");
6896
});
97+
6998
test("Should get successful response with render and playWithBrowser", async () => {
7099
const client = new ScrapeDo(TOKEN);
71100
const response = await client.sendRequest("GET", {
72-
url: "https://httpbin.co/anything",
101+
url: "https://httpbin.co",
73102
render: true,
74103
returnJSON: true,
75104
playWithBrowser: [
76105
{
77106
Action: "WaitSelector",
78-
WaitSelector: "body",
107+
WaitSelector: "a code",
108+
},
109+
{
110+
Action: "ScrollY",
111+
Value: 100
79112
},
113+
{
114+
Action: "Wait",
115+
Timeout: 1000
116+
},
117+
{
118+
Action: "Execute",
119+
Execute: "window.scrollY"
120+
}
80121
],
81122
});
82123

83124
expect(response.statusCode).toBe(200);
84-
expect(response.actionResults).toHaveLength(1);
85-
expect(response.actionResults![0].action).toBe("WaitSelector");
86-
expect(response.actionResults![0].success).toBe(true);
125+
expect(response.actionResults).toHaveLength(4);
126+
expect(response.actionResults?.at(-1)?.action).toBe("Execute");
127+
expect(response.actionResults?.at(-1)?.success).toBe(true);
128+
// Since the scrollTo operation is asynchronous, its value may not be exactly 100. For this reason, it is only checked if it is greater than 0.
129+
expect(response.actionResults?.at(-1)?.response).toBeGreaterThan(0);
87130
});
131+
88132
test("Should get successful response with render and super proxy", async () => {
89133
const client = new ScrapeDo(TOKEN);
90134
const response = await client.sendRequest("GET", {
@@ -95,11 +139,13 @@ describe("Usability tests", () => {
95139

96140
expect(response.statusCode).toBe(200);
97141
});
142+
98143
test("Should get successful response from statistics request", async () => {
99144
const client = new ScrapeDo(TOKEN);
100145
const stats = await client.statistics();
101146

102147
expect(stats.IsActive).toBe(true);
103148
expect(stats.RemainingMonthlyRequest).toBeGreaterThan(0);
104149
});
150+
105151
});

0 commit comments

Comments
 (0)