From b7ce20b2ca27adca8e5470952b838a20de30b7c9 Mon Sep 17 00:00:00 2001 From: Adam Kliment Date: Thu, 2 Jan 2025 17:08:46 +0100 Subject: [PATCH] feat: Actor definition, Actor usage to README --- .actor/Dockerfile | 19 ++++++++ .actor/README.md | 93 ++++++++++++++++++++++++++++++++++++++ .actor/actor.json | 13 ++++++ .actor/actor.sh | 14 ++++++ .actor/dataset_schema.json | 45 ++++++++++++++++++ .actor/input_schema.json | 18 ++++++++ docs/README.md | 18 ++++++++ 7 files changed, 220 insertions(+) create mode 100644 .actor/Dockerfile create mode 100644 .actor/README.md create mode 100644 .actor/actor.json create mode 100755 .actor/actor.sh create mode 100644 .actor/dataset_schema.json create mode 100644 .actor/input_schema.json diff --git a/.actor/Dockerfile b/.actor/Dockerfile new file mode 100644 index 000000000..3a6de091a --- /dev/null +++ b/.actor/Dockerfile @@ -0,0 +1,19 @@ +FROM sherlock/sherlock as sherlock + +# Install Node.js +RUN apt-get update; apt-get install curl gpg -y +RUN mkdir -p /etc/apt/keyrings +RUN curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg +RUN echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_20.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list +RUN apt-get update && apt-get install -y curl bash git jq jo xz-utils nodejs + +# Install Apify CLI (node.js) for the Actor Runtime +RUN npm -g install apify-cli + +# Install Dependencies for the Actor Shell Script +RUN apt-get update && apt-get install -y bash jq jo xz-utils nodejs + +# Copy Actor dir with the actorization shell script +COPY .actor/ .actor + +ENTRYPOINT [".actor/actor.sh"] diff --git a/.actor/README.md b/.actor/README.md new file mode 100644 index 000000000..cea3f9da9 --- /dev/null +++ b/.actor/README.md @@ -0,0 +1,93 @@ +# Sherlock Actor on Apify + +[![Sherlock Actor](https://apify.com/actor-badge?actor=netmilk/sherlock)](https://apify.com/netmilk/sherlock?fpr=sherlock) + +This Actor wraps the [Sherlock Project](https://sherlockproject.xyz/) to provide serverless username reconnaissance across social networks in the cloud. It helps you find usernames across multiple social media platforms without installing and running the tool locally. + +## What are Actors? +[Actors](https://docs.apify.com/platform/actors?fpr=sherlock) are serverless microservices running on the [Apify Platform](https://apify.com/?fpr=sherlock). They are based on the [Actor SDK](https://docs.apify.com/sdk/js?fpr=sherlock) and can be found in the [Apify Store](https://apify.com/store?fpr=sherlock). Learn more about Actors in the [Apify Whitepaper](https://whitepaper.actor?fpr=sherlock). + +## Usage + +### Apify Console + +1. Go to the Apify Actor page +2. Click "Run" +3. In the input form, fill in **Username(s)** to search for +4. The Actor will run and produce its outputs in the default datastore + + +### Apify CLI + +```bash +apify call YOUR_USERNAME/sherlock --input='{ + "usernames": ["johndoe", "janedoe"] +}' +``` + +### Using Apify API + +```bash +curl --request POST \ + --url "https://api.apify.com/v2/acts/YOUR_USERNAME~sherlock/run" \ + --header 'Content-Type: application/json' \ + --header 'Authorization: Bearer YOUR_API_TOKEN' \ + --data '{ + "usernames": ["johndoe", "janedoe"], + } +}' +``` + +## Input Parameters + +The Actor accepts a JSON schema with the following structure: + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `usernames` | array | Yes | - | List of usernames to search for | +| `usernames[]` | string | Yes | "json" | Username to search for | + + +### Example Input + +```json +{ + "usernames": ["techuser", "designuser"], +} +``` + +## Output + +The Actor provides three types of outputs: + +### Dataset Record* + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `username` | string | Yes | Username the search was conducted for | +| `links` | arrray | Yes | Array with found links to the social media | +| `links[]`| string | No | URL to the account + +### Example Dataset Item (JSON) + +```json +{ + "username": "johndoe", + "links": [ + "https://github.com/johndoe" + ] +} +``` + +## Performance & Resources + +- **Memory Requirements**: + - Minimum: 512 MB RAM + - Recommended: 1 GB RAM for multiple usernames +- **Processing Time**: + - Single username: ~1-2 minutes + - Multiple usernames: 2-5 minutes + - Varies based on number of sites checked and response times + + +For more help, check the [Sherlock Project documentation](https://github.com/sherlock-project/sherlock) or raise an issue in the Actor's repository. diff --git a/.actor/actor.json b/.actor/actor.json new file mode 100644 index 000000000..754958d26 --- /dev/null +++ b/.actor/actor.json @@ -0,0 +1,13 @@ +{ + "actorSpecification": 1, + "name": "sherlock", + "version": "0.0", + "buildTag": "latest", + "environmentVariables": {}, + "dockerFile": "./Dockerfile", + "dockerContext": "../", + "input": "./input_schema.json", + "storages": { + "dataset": "./dataset_schema.json" + } +} diff --git a/.actor/actor.sh b/.actor/actor.sh new file mode 100755 index 000000000..dec841bca --- /dev/null +++ b/.actor/actor.sh @@ -0,0 +1,14 @@ +#!/bin/bash +INPUT=`apify actor:get-input | jq -r .usernames[] | xargs echo` +echo "INPUT: $INPUT" + +sherlock $INPUT + +for username in $INPUT; do + # escape the special meaning leading characters + # https://github.com/jpmens/jo/blob/master/jo.md#description + safe_username=$(echo $username | sed 's/^@/\\@/' | sed 's/^:/\\:/' | sed 's/%/\\%/') + echo "pushing results for username: $username, content:" + cat $username.txt + sed '$d' $username.txt | jo -a | jo username=$safe_username links:=- | apify actor:push-data +done diff --git a/.actor/dataset_schema.json b/.actor/dataset_schema.json new file mode 100644 index 000000000..9edce2f87 --- /dev/null +++ b/.actor/dataset_schema.json @@ -0,0 +1,45 @@ +{ + "actorSpecification": 1, + "fields":{ + "title": "Sherlock actor input", + "description": "This is actor input schema", + "type": "object", + "schemaVersion": 1, + "properties": { + "links": { + "title": "Links to accounts", + "type": "array", + "description": "A list of social media accounts found for the uername" + }, + "username": { + "title": "Lookup username", + "type": "string", + "description": "Username the lookup was performed for" + } + }, + "required": [ + "username", + "links" + ] + }, + "views": { + "overview": { + "title": "Overview", + "transformation": { + "fields": [ + "username", + "links" + ], + }, + "display": { + "component": "table", + "links": { + "label": "Links" + }, + "username":{ + "label": "Username" + } + } + } + } +} diff --git a/.actor/input_schema.json b/.actor/input_schema.json new file mode 100644 index 000000000..13f85b807 --- /dev/null +++ b/.actor/input_schema.json @@ -0,0 +1,18 @@ +{ + "title": "Sherlock actor input", + "description": "This is actor input schema", + "type": "object", + "schemaVersion": 1, + "properties": { + "usernames": { + "title": "Usernames to hunt down", + "type": "array", + "description": "A list of usernames to be checked for existence across social media", + "editor": "stringList", + "prefill": ["johndoe"] + } + }, + "required": [ + "usernames" + ] +} diff --git a/docs/README.md b/docs/README.md index 0fa75ecc5..954dd28b2 100644 --- a/docs/README.md +++ b/docs/README.md @@ -98,6 +98,24 @@ optional arguments: --local, -l Force the use of the local data.json file. --nsfw Include checking of NSFW sites from default list. ``` +## Apify Actor Usage [![Sherlock Actor](https://apify.com/actor-badge?actor=netmilk/sherlock)](https://apify.com/netmilk/sherlock?fpr=sherlock) + +Run Sherlock Actor on Apify + +You can run Sherlock in the cloud without installation using the [Sherlock Actor](https://apify.com/netmilk/sherlock?fpr=sherlock) on [Apify](https://apify.com?fpr=sherlock) free of charge. + +``` bash +$ echo '{"usernames":["user123"]}' | apify call -so netmilk/sherlock +[{ + "username": "user123", + "links": [ + "https://www.1337x.to/user/user123/", + ... + ] +}]s +``` + +Read more about the [Sherlock Actor](../.actor/README.md), including how to use it programmaticaly via the Apify [API](https://apify.com/netmilk/sherlock/api?fpr=sherlock), [CLI](https://docs.apify.com/cli/?fpr=sherlock) and [JS/TS and Python SDKs](https://docs.apify.com/sdk?fpr=sherlock). ## Credits