From c8519b0edc842f30c1a60a055ef504bd62dddfef Mon Sep 17 00:00:00 2001 From: Michelle Bergeron Date: Fri, 1 Aug 2025 14:28:35 -0400 Subject: [PATCH 1/2] new components --- .../actions/scrape-serp/scrape-serp.mjs | 73 ++++++++++++ .../actions/scrape-website/scrape-website.mjs | 49 ++++++++ .../actions/unlock-website/unlock-website.mjs | 67 +++++++++++ components/bright_data/bright_data.app.mjs | 108 +++++++++++++++++- components/bright_data/package.json | 7 +- 5 files changed, 298 insertions(+), 6 deletions(-) create mode 100644 components/bright_data/actions/scrape-serp/scrape-serp.mjs create mode 100644 components/bright_data/actions/scrape-website/scrape-website.mjs create mode 100644 components/bright_data/actions/unlock-website/unlock-website.mjs diff --git a/components/bright_data/actions/scrape-serp/scrape-serp.mjs b/components/bright_data/actions/scrape-serp/scrape-serp.mjs new file mode 100644 index 0000000000000..ddfdc9d2ff253 --- /dev/null +++ b/components/bright_data/actions/scrape-serp/scrape-serp.mjs @@ -0,0 +1,73 @@ +import brightData from "../../bright_data.app.mjs"; +import { ConfigurationError } from "@pipedream/platform"; + +export default { + key: "bright_data-scrape-serp", + name: "Scrape SERP", + description: "Extract search engine results using Bright Data SERP API. [See the documentation](https://docs.brightdata.com/api-reference/rest-api/serp/scrape-serp)", + version: "0.0.1", + type: "action", + props: { + brightData, + url: { + propDefinition: [ + brightData, + "url", + ], + description: "Complete target URL to scrape. Must include protocol (http/https), be publicly accessible. Example: `https://www.google.com/search?q=pizza`", + }, + zone: { + propDefinition: [ + brightData, + "zone", + () => ({ + type: "serp", + }), + ], + }, + format: { + propDefinition: [ + brightData, + "format", + ], + }, + method: { + propDefinition: [ + brightData, + "method", + ], + }, + country: { + propDefinition: [ + brightData, + "country", + ], + }, + dataFormat: { + propDefinition: [ + brightData, + "dataFormat", + ], + }, + }, + async run({ $ }) { + const data = await this.brightData.requestWebsite({ + $, + data: { + url: this.url, + zone: this.zone, + format: this.format, + method: this.method, + country: this.country, + data_format: this.dataFormat, + }, + }); + + if (data.status_code === 400) { + throw new ConfigurationError(data.body); + } + + $.export("$summary", `Scraped SERP for ${this.url}`); + return data; + }, +}; diff --git a/components/bright_data/actions/scrape-website/scrape-website.mjs b/components/bright_data/actions/scrape-website/scrape-website.mjs new file mode 100644 index 0000000000000..21d4608ee0ce3 --- /dev/null +++ b/components/bright_data/actions/scrape-website/scrape-website.mjs @@ -0,0 +1,49 @@ +import brightData from "../../bright_data.app.mjs"; +import { ConfigurationError } from "@pipedream/platform"; + +export default { + key: "bright_data-scrape-website", + name: "Scrape Website", + description: "Scrape a website and return the HTML. [See the documentation](https://docs.brightdata.com/api-reference/web-scraper-api/synchronous-requests)", + version: "0.0.1", + type: "action", + props: { + brightData, + datasetId: { + propDefinition: [ + brightData, + "datasetId", + ], + }, + url: { + propDefinition: [ + brightData, + "url", + ], + description: "The URL of the website to scrape", + }, + }, + async run({ $ }) { + try { + const data = await this.brightData.scrapeWebsite({ + $, + params: { + dataset_id: this.datasetId, + }, + data: { + input: [ + { + url: this.url, + }, + ], + }, + }); + + $.export("$summary", `Scraped website ${this.url}`); + return data; + } catch (error) { + const errors = (JSON.parse(error.message)).errors; + throw new ConfigurationError(errors.map((e) => e.join(" - ")).join(" | ")); + } + }, +}; diff --git a/components/bright_data/actions/unlock-website/unlock-website.mjs b/components/bright_data/actions/unlock-website/unlock-website.mjs new file mode 100644 index 0000000000000..022c30c7d7fd8 --- /dev/null +++ b/components/bright_data/actions/unlock-website/unlock-website.mjs @@ -0,0 +1,67 @@ +import brightData from "../../bright_data.app.mjs"; + +export default { + key: "bright_data-unlock-website", + name: "Unlock Website", + description: "Send an API call to a URL and get the HTML back. Enables you to bypass anti-bot measures, manages proxies, and solves CAPTCHAs automatically for easier web data collection. [See the documentation](https://docs.brightdata.com/api-reference/rest-api/unlocker/unlock-website)", + version: "0.0.1", + type: "action", + props: { + brightData, + url: { + propDefinition: [ + brightData, + "url", + ], + }, + zone: { + propDefinition: [ + brightData, + "zone", + () => ({ + type: "unblocker", + }), + ], + }, + format: { + propDefinition: [ + brightData, + "format", + ], + }, + method: { + propDefinition: [ + brightData, + "method", + ], + }, + country: { + propDefinition: [ + brightData, + "country", + ], + }, + dataFormat: { + propDefinition: [ + brightData, + "dataFormat", + ], + }, + }, + async run({ $ }) { + const data = await this.brightData.requestWebsite({ + $, + data: { + url: this.url, + zone: this.zone, + format: this.format, + method: this.method, + country: this.country, + data_format: this.dataFormat, + }, + }); + + $.export("$summary", `Unlocked website ${this.url}`); + return data; + }, +}; diff --git a/components/bright_data/bright_data.app.mjs b/components/bright_data/bright_data.app.mjs index f0f75c0ae5df8..685d77adc0d8f 100644 --- a/components/bright_data/bright_data.app.mjs +++ b/components/bright_data/bright_data.app.mjs @@ -1,11 +1,111 @@ +import { axios } from "@pipedream/platform"; + export default { type: "app", app: "bright_data", - propDefinitions: {}, + propDefinitions: { + datasetId: { + type: "string", + label: "Dataset ID", + description: "The ID of the dataset to use", + async options() { + const datasets = await this.listDatasets(); + return datasets.map((dataset) => ({ + label: dataset.name, + value: dataset.id, + })); + }, + }, + zone: { + type: "string", + label: "Zone", + description: "Zone identifier that defines your Bright Data product configuration. Each zone contains targeting rules, output preferences, and access permissions. Manage zones at: https://brightdata.com/cp/zones", + async options({ type }) { + const zones = await this.listZones(); + return zones?.filter((zone) => zone.type === type)?.map(({ name }) => name) || []; + }, + }, + url: { + type: "string", + label: "URL", + description: "Complete target URL to scrape. Must include protocol (http/https), be publicly accessible.", + }, + format: { + type: "string", + label: "Format", + description: "Output format of the response", + options: [ + "json", + "raw", + ], + }, + method: { + type: "string", + label: "Method", + description: "HTTP method to use for the request", + options: [ + "GET", + "POST", + ], + optional: true, + }, + country: { + type: "string", + label: "Country", + description: "Two-letter ISO 3166-1 country code for proxy location", + optional: true, + }, + dataFormat: { + type: "string", + label: "Data Format", + description: "Additional response format transformation: `markdown` converts HTML content to clean markdown format, `screenshot` captures a PNG image of the rendered page.", + options: [ + "markdown", + "screenshot", + ], + optional: true, + }, + }, methods: { - // this.$auth contains connected account data - authKeys() { - console.log(Object.keys(this.$auth)); + _baseUrl() { + return "https://api.brightdata.com"; + }, + _makeRequest({ + $ = this, path, ...opts + }) { + return axios($, { + url: `${this._baseUrl()}${path}`, + headers: { + Authorization: `Bearer ${this.$auth.api_key}`, + }, + ...opts, + }); + }, + listDatasets(opts = {}) { + return this._makeRequest({ + path: "/datasets/list", + ...opts, + }); + }, + listZones(opts = {}) { + return this._makeRequest({ + path: "/zone/get_active_zones", + ...opts, + }); + }, + scrapeWebsite(opts = {}) { + return this._makeRequest({ + path: "/datasets/v3/scrape", + method: "POST", + ...opts, + }); + }, + requestWebsite(opts = {}) { + return this._makeRequest({ + path: "/request", + method: "POST", + ...opts, + }); }, }, }; diff --git a/components/bright_data/package.json b/components/bright_data/package.json index 55f14def56bad..38a7e9881c29e 100644 --- a/components/bright_data/package.json +++ b/components/bright_data/package.json @@ -1,6 +1,6 @@ { "name": "@pipedream/bright_data", - "version": "0.0.1", + "version": "0.1.0", "description": "Pipedream Bright Data Components", "main": "bright_data.app.mjs", "keywords": [ @@ -11,5 +11,8 @@ "author": "Pipedream (https://pipedream.com/)", "publishConfig": { "access": "public" + }, + "dependencies": { + "@pipedream/platform": "^3.1.0" } -} \ No newline at end of file +} From 570d1441a10b5966d2164bd1d8264c003b406771 Mon Sep 17 00:00:00 2001 From: Michelle Bergeron Date: Fri, 1 Aug 2025 14:29:46 -0400 Subject: [PATCH 2/2] pnpm-lock.yaml --- pnpm-lock.yaml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index a6e00927c63e9..9fd51f905448f 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -1901,7 +1901,11 @@ importers: specifier: ^3.0.0 version: 3.0.3 - components/bright_data: {} + components/bright_data: + dependencies: + '@pipedream/platform': + specifier: ^3.1.0 + version: 3.1.0 components/brilliant_directories: dependencies: @@ -37391,6 +37395,8 @@ snapshots: '@putout/operator-filesystem': 5.0.0(putout@36.13.1(eslint@8.57.1)(typescript@5.6.3)) '@putout/operator-json': 2.2.0 putout: 36.13.1(eslint@8.57.1)(typescript@5.6.3) + transitivePeerDependencies: + - supports-color '@putout/operator-regexp@1.0.0(putout@36.13.1(eslint@8.57.1)(typescript@5.6.3))': dependencies: