diff --git a/Dockerfile b/Dockerfile index 7f7f3ba..4f46e9e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -23,6 +23,7 @@ EOT COPY bin bin COPY lib lib +COPY conf conf COPY *.conf . ENV PERL5LIB="/app/local/lib/perl5:/app/lib" PATH="/app/local/bin:${PATH}" diff --git a/bin/backup.pl b/bin/backup.pl index 7fefd28..6367028 100644 --- a/bin/backup.pl +++ b/bin/backup.pl @@ -33,7 +33,7 @@ ); # setup -my $home = path( home() ); +my $home = home(); run_restore() if $restore; run_purge() if $purge; diff --git a/bin/cpan_testers.pl b/bin/cpan_testers.pl index 04530d9..4ceaf79 100644 --- a/bin/cpan_testers.pl +++ b/bin/cpan_testers.pl @@ -23,8 +23,6 @@ ); # setup - -# XXX fix hardcoded path my $home = home(); my $db diff --git a/bin/mapping.pl b/bin/mapping.pl new file mode 100644 index 0000000..830442c --- /dev/null +++ b/bin/mapping.pl @@ -0,0 +1,32 @@ +use strict; +use warnings; + +use Cpanel::JSON::XS qw< decode_json >; +use Getopt::Long; +use MetaCPAN::Mapper; +use MetaCPAN::Ingest qw< home >; + +my ( $index, $cmd ); +GetOptions( + "index=s" => \$index, + "cmd=s" => \$cmd, +); +die "cmd can only be one of: 'create', 'delete'\n" + unless grep { $cmd eq $_ } qw< create delete >; + +# setup +my $type = $index; + +my $mapper = MetaCPAN::Mapper->new(); + +$mapper->index_delete($index) + if $mapper->index_exists($index); + +if ( $cmd eq 'create' ) { + my $home = home(); + my $map_file = $home->child('conf/es/' . $index . '/mapping.json'); + my $mapping = decode_json $map_file->slurp(); + + $mapper->index_create($index); + $mapper->index_put_mapping($index, $type, $mapping); +} diff --git a/conf/es/account/mapping.json b/conf/es/account/mapping.json new file mode 100644 index 0000000..0b4e925 --- /dev/null +++ b/conf/es/account/mapping.json @@ -0,0 +1,45 @@ +{ + "dynamic": false, + "properties": { + "access_token": { + "dynamic": true, + "properties": { + "client": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "token": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + } + } + }, + "code": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "id": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "identity": { + "dynamic": false, + "properties": { + "key": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "name": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + } + } + } + } +} diff --git a/conf/es/account/settings.json b/conf/es/account/settings.json new file mode 100644 index 0000000..c90ef6c --- /dev/null +++ b/conf/es/account/settings.json @@ -0,0 +1,8 @@ +{ + "mapper": { + "dynamic": "false" + }, + "number_of_replicas": 1, + "number_of_shards": 1, + "refresh_interval": "1s" +} diff --git a/conf/es/author/mapping.json b/conf/es/author/mapping.json new file mode 100644 index 0000000..ad9278a --- /dev/null +++ b/conf/es/author/mapping.json @@ -0,0 +1,157 @@ +{ + "dynamic": false, + "properties": { + "asciiname": { + "fields": { + "analyzed": { + "analyzer": "standard", + "fielddata": { + "format": "disabled" + }, + "store": true, + "type": "string" + } + }, + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "blog": { + "dynamic": true, + "properties": { + "feed": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "url": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + } + } + }, + "city": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "country": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "donation": { + "dynamic": true, + "properties": { + "id": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "name": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + } + } + }, + "email": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "gravatar_url": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "is_pause_custodial_account": { + "type": "boolean" + }, + "location": { + "type": "geo_point" + }, + "name": { + "fields": { + "analyzed": { + "analyzer": "standard", + "fielddata": { + "format": "disabled" + }, + "store": true, + "type": "string" + } + }, + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "pauseid": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "perlmongers": { + "dynamic": true, + "properties": { + "name": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "url": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + } + } + }, + "profile": { + "dynamic": false, + "include_in_root": true, + "properties": { + "id": { + "fields": { + "analyzed": { + "analyzer": "simple", + "fielddata": { + "format": "disabled" + }, + "store": true, + "type": "string" + } + }, + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "name": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + } + }, + "type": "nested" + }, + "region": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "updated": { + "format": "strict_date_optional_time||epoch_millis", + "type": "date" + }, + "user": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "website": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + } + } +} diff --git a/conf/es/author/settings.json b/conf/es/author/settings.json new file mode 100644 index 0000000..c90ef6c --- /dev/null +++ b/conf/es/author/settings.json @@ -0,0 +1,8 @@ +{ + "mapper": { + "dynamic": "false" + }, + "number_of_replicas": 1, + "number_of_shards": 1, + "refresh_interval": "1s" +} diff --git a/conf/es/contributor/mapping.json b/conf/es/contributor/mapping.json new file mode 100644 index 0000000..0405a44 --- /dev/null +++ b/conf/es/contributor/mapping.json @@ -0,0 +1,35 @@ +{ + "dynamic": false, + "properties": { + "distribution": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "pauseid": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "name": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "email": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "release_author": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "release_name": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + } + } +} diff --git a/conf/es/contributor/settings.json b/conf/es/contributor/settings.json new file mode 100644 index 0000000..c90ef6c --- /dev/null +++ b/conf/es/contributor/settings.json @@ -0,0 +1,8 @@ +{ + "mapper": { + "dynamic": "false" + }, + "number_of_replicas": 1, + "number_of_shards": 1, + "refresh_interval": "1s" +} diff --git a/conf/es/cover/mapping.json b/conf/es/cover/mapping.json new file mode 100644 index 0000000..e0a6a61 --- /dev/null +++ b/conf/es/cover/mapping.json @@ -0,0 +1,40 @@ +{ + "dynamic": false, + "properties": { + "criteria": { + "dynamic": true, + "properties": { + "branch": { + "type": "float" + }, + "condition": { + "type": "float" + }, + "statement": { + "type": "float" + }, + "subroutine": { + "type": "float" + }, + "total": { + "type": "float" + } + } + }, + "distribution": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "release": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "version": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + } + } +} diff --git a/conf/es/cover/settings.json b/conf/es/cover/settings.json new file mode 100644 index 0000000..c90ef6c --- /dev/null +++ b/conf/es/cover/settings.json @@ -0,0 +1,8 @@ +{ + "mapper": { + "dynamic": "false" + }, + "number_of_replicas": 1, + "number_of_shards": 1, + "refresh_interval": "1s" +} diff --git a/conf/es/cve/mapping.json b/conf/es/cve/mapping.json new file mode 100644 index 0000000..4f34920 --- /dev/null +++ b/conf/es/cve/mapping.json @@ -0,0 +1,41 @@ +{ + "dynamic": false, + "properties": { + "affected_versions": { + "type": "string" + }, + "cpansa_id": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "cves": { + "type": "string" + }, + "description": { + "type": "string" + }, + "distribution": { + "index": "not_analyzed", + "type": "string" + }, + "references": { + "type": "string" + }, + "releases": { + "index": "not_analyzed", + "type": "string" + }, + "reported": { + "format": "strict_date_optional_time||epoch_millis", + "type": "date" + }, + "severity": { + "type": "string" + }, + "versions": { + "index": "not_analyzed", + "type": "string" + } + } +} diff --git a/conf/es/cve/settings.json b/conf/es/cve/settings.json new file mode 100644 index 0000000..c90ef6c --- /dev/null +++ b/conf/es/cve/settings.json @@ -0,0 +1,8 @@ +{ + "mapper": { + "dynamic": "false" + }, + "number_of_replicas": 1, + "number_of_shards": 1, + "refresh_interval": "1s" +} diff --git a/conf/es/distribution/mapping.json b/conf/es/distribution/mapping.json new file mode 100644 index 0000000..333927a --- /dev/null +++ b/conf/es/distribution/mapping.json @@ -0,0 +1,121 @@ +{ + "dynamic": false, + "properties": { + "bugs": { + "dynamic": true, + "properties": { + "github": { + "dynamic": true, + "properties": { + "active": { + "type": "integer" + }, + "closed": { + "type": "integer" + }, + "open": { + "type": "integer" + }, + "source": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + } + } + }, + "rt": { + "dynamic": true, + "properties": { + "active": { + "type": "integer" + }, + "closed": { + "type": "integer" + }, + "new": { + "type": "integer" + }, + "open": { + "type": "integer" + }, + "patched": { + "type": "integer" + }, + "rejected": { + "type": "integer" + }, + "resolved": { + "type": "integer" + }, + "source": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "stalled": { + "type": "integer" + } + } + } + } + }, + "external_package": { + "dynamic": true, + "properties": { + "cygwin": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "debian": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "fedora": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + } + } + }, + "name": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "repo": { + "dynamic": true, + "properties": { + "github": { + "dynamic": true, + "properties": { + "stars": { + "type": "integer" + }, + "watchers": { + "type": "integer" + } + } + } + } + }, + "river": { + "dynamic": true, + "properties": { + "bucket": { + "type": "integer" + }, + "bus_factor": { + "type": "integer" + }, + "immediate": { + "type": "integer" + }, + "total": { + "type": "integer" + } + } + } + } +} diff --git a/conf/es/distribution/settings.json b/conf/es/distribution/settings.json new file mode 100644 index 0000000..c90ef6c --- /dev/null +++ b/conf/es/distribution/settings.json @@ -0,0 +1,8 @@ +{ + "mapper": { + "dynamic": "false" + }, + "number_of_replicas": 1, + "number_of_shards": 1, + "refresh_interval": "1s" +} diff --git a/conf/es/favorite/mapping.json b/conf/es/favorite/mapping.json new file mode 100644 index 0000000..aeff5b2 --- /dev/null +++ b/conf/es/favorite/mapping.json @@ -0,0 +1,34 @@ +{ + "dynamic": false, + "properties": { + "author": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "date": { + "format": "strict_date_optional_time||epoch_millis", + "type": "date" + }, + "distribution": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "id": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "release": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "user": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + } + } +} diff --git a/conf/es/favorite/settings.json b/conf/es/favorite/settings.json new file mode 100644 index 0000000..c90ef6c --- /dev/null +++ b/conf/es/favorite/settings.json @@ -0,0 +1,8 @@ +{ + "mapper": { + "dynamic": "false" + }, + "number_of_replicas": 1, + "number_of_shards": 1, + "refresh_interval": "1s" +} diff --git a/conf/es/file/mapping.json b/conf/es/file/mapping.json new file mode 100644 index 0000000..5374d9f --- /dev/null +++ b/conf/es/file/mapping.json @@ -0,0 +1,293 @@ +{ + "dynamic": false, + "properties": { + "abstract": { + "fields": { + "analyzed": { + "analyzer": "standard", + "fielddata": { + "format": "disabled" + }, + "store": true, + "type": "string" + } + }, + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "author": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "authorized": { + "type": "boolean" + }, + "binary": { + "type": "boolean" + }, + "date": { + "format": "strict_date_optional_time||epoch_millis", + "type": "date" + }, + "deprecated": { + "type": "boolean" + }, + "description": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "dir": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "directory": { + "type": "boolean" + }, + "dist_fav_count": { + "type": "integer" + }, + "distribution": { + "fields": { + "analyzed": { + "analyzer": "standard", + "fielddata": { + "format": "disabled" + }, + "store": true, + "type": "string" + }, + "camelcase": { + "analyzer": "camelcase", + "store": true, + "type": "string" + }, + "lowercase": { + "analyzer": "lowercase", + "store": true, + "type": "string" + } + }, + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "documentation": { + "fields": { + "analyzed": { + "analyzer": "standard", + "fielddata": { + "format": "disabled" + }, + "store": true, + "type": "string" + }, + "camelcase": { + "analyzer": "camelcase", + "store": true, + "type": "string" + }, + "edge": { + "analyzer": "edge", + "store": true, + "type": "string" + }, + "edge_camelcase": { + "analyzer": "edge_camelcase", + "store": true, + "type": "string" + }, + "lowercase": { + "analyzer": "lowercase", + "store": true, + "type": "string" + } + }, + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "documentation_length": { + "type": "integer" + }, + "download_url": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "id": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "indexed": { + "type": "boolean" + }, + "level": { + "type": "integer" + }, + "maturity": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "mime": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "module": { + "dynamic": false, + "include_in_root": true, + "properties": { + "associated_pod": { + "type": "string" + }, + "authorized": { + "type": "boolean" + }, + "indexed": { + "type": "boolean" + }, + "name": { + "fields": { + "analyzed": { + "analyzer": "standard", + "fielddata": { + "format": "disabled" + }, + "store": true, + "type": "string" + }, + "camelcase": { + "analyzer": "camelcase", + "store": true, + "type": "string" + }, + "lowercase": { + "analyzer": "lowercase", + "store": true, + "type": "string" + } + }, + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "version": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "version_numified": { + "type": "float" + } + }, + "type": "nested" + }, + "name": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "path": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "pod": { + "analyzer": "standard", + "doc_values": false, + "fields": { + "analyzed": { + "analyzer": "standard", + "doc_values": false, + "index": "analyzed", + "type": "string" + } + }, + "index": "analyzed", + "type": "string" + }, + "pod_lines": { + "doc_values": true, + "ignore_above": 2048, + "index": "no", + "type": "string" + }, + "release": { + "fields": { + "analyzed": { + "analyzer": "standard", + "fielddata": { + "format": "disabled" + }, + "store": true, + "type": "string" + }, + "camelcase": { + "analyzer": "camelcase", + "store": true, + "type": "string" + }, + "lowercase": { + "analyzer": "lowercase", + "store": true, + "type": "string" + } + }, + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "sloc": { + "type": "integer" + }, + "slop": { + "type": "integer" + }, + "stat": { + "dynamic": true, + "properties": { + "gid": { + "type": "long" + }, + "mode": { + "type": "integer" + }, + "mtime": { + "type": "integer" + }, + "size": { + "type": "integer" + }, + "uid": { + "type": "long" + } + } + }, + "status": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "suggest": { + "analyzer": "simple", + "max_input_length": 50, + "preserve_position_increments": true, + "preserve_separators": true, + "type": "completion" + }, + "version": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "version_numified": { + "type": "float" + } + } +} diff --git a/conf/es/file/settings.json b/conf/es/file/settings.json new file mode 100644 index 0000000..1d48454 --- /dev/null +++ b/conf/es/file/settings.json @@ -0,0 +1,56 @@ +{ + "analysis": { + "analyzer": { + "camelcase": { + "filter": [ + "lowercase", + "unique" + ], + "tokenizer": "camelcase", + "type": "custom" + }, + "edge": { + "filter": [ + "lowercase", + "edge" + ], + "tokenizer": "standard", + "type": "custom" + }, + "edge_camelcase": { + "filter": [ + "lowercase", + "edge" + ], + "tokenizer": "camelcase", + "type": "custom" + }, + "fulltext": { + "type": "english" + }, + "lowercase": { + "filter": "lowercase", + "tokenizer": "keyword" + } + }, + "filter": { + "edge": { + "max_gram": "20", + "min_gram": "1", + "type": "edge_ngram" + } + }, + "tokenizer": { + "camelcase": { + "pattern": "([^\\p{L}\\d]+)|(?<=\\D)(?=\\d)|(?<=\\d)(?=\\D)|(?<=[\\p{L}&&[^\\p{Lu}]])(?=\\p{Lu})|(?<=\\p{Lu})(?=\\p{Lu}[\\p{L}&&[^\\p{Lu}]])", + "type": "pattern" + } + } + }, + "mapper": { + "dynamic": "false" + }, + "number_of_replicas": 1, + "number_of_shards": 1, + "refresh_interval": "1s" +} diff --git a/conf/es/mirror/mapping.json b/conf/es/mirror/mapping.json new file mode 100644 index 0000000..09f2d85 --- /dev/null +++ b/conf/es/mirror/mapping.json @@ -0,0 +1,166 @@ +{ + "dynamic": false, + "properties": { + "A_or_CNAME": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "aka_name": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "ccode": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "city": { + "fields": { + "analyzed": { + "analyzer": "standard", + "fielddata": { + "format": "disabled" + }, + "store": true, + "type": "string" + } + }, + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "contact": { + "dynamic": false, + "properties": { + "contact_site": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "contact_user": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + } + } + }, + "continent": { + "fields": { + "analyzed": { + "analyzer": "standard", + "fielddata": { + "format": "disabled" + }, + "store": true, + "type": "string" + } + }, + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "country": { + "fields": { + "analyzed": { + "analyzer": "standard", + "fielddata": { + "format": "disabled" + }, + "store": true, + "type": "string" + } + }, + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "dnsrr": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "freq": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "ftp": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "http": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "inceptdate": { + "format": "strict_date_optional_time||epoch_millis", + "type": "date" + }, + "location": { + "type": "geo_point" + }, + "name": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "note": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "org": { + "fields": { + "analyzed": { + "analyzer": "standard", + "fielddata": { + "format": "disabled" + }, + "store": true, + "type": "string" + } + }, + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "region": { + "fields": { + "analyzed": { + "analyzer": "standard", + "fielddata": { + "format": "disabled" + }, + "store": true, + "type": "string" + } + }, + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "reitredate": { + "format": "strict_date_optional_time||epoch_millis", + "type": "date" + }, + "rsync": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "src": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "tz": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + } + } +} diff --git a/conf/es/mirror/settings.json b/conf/es/mirror/settings.json new file mode 100644 index 0000000..c90ef6c --- /dev/null +++ b/conf/es/mirror/settings.json @@ -0,0 +1,8 @@ +{ + "mapper": { + "dynamic": "false" + }, + "number_of_replicas": 1, + "number_of_shards": 1, + "refresh_interval": "1s" +} diff --git a/conf/es/package/mapping.json b/conf/es/package/mapping.json new file mode 100644 index 0000000..82dacee --- /dev/null +++ b/conf/es/package/mapping.json @@ -0,0 +1,35 @@ +{ + "dynamic": false, + "properties": { + "author": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "dist_version": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "distribution": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "file": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "module_name": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "version": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + } + } +} diff --git a/conf/es/package/settings.json b/conf/es/package/settings.json new file mode 100644 index 0000000..c90ef6c --- /dev/null +++ b/conf/es/package/settings.json @@ -0,0 +1,8 @@ +{ + "mapper": { + "dynamic": "false" + }, + "number_of_replicas": 1, + "number_of_shards": 1, + "refresh_interval": "1s" +} diff --git a/conf/es/permission/mapping.json b/conf/es/permission/mapping.json new file mode 100644 index 0000000..688f56b --- /dev/null +++ b/conf/es/permission/mapping.json @@ -0,0 +1,20 @@ +{ + "dynamic": false, + "properties": { + "co_maintainers": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "module_name": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "owner": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + } + } +} diff --git a/conf/es/permission/settings.json b/conf/es/permission/settings.json new file mode 100644 index 0000000..c90ef6c --- /dev/null +++ b/conf/es/permission/settings.json @@ -0,0 +1,8 @@ +{ + "mapper": { + "dynamic": "false" + }, + "number_of_replicas": 1, + "number_of_shards": 1, + "refresh_interval": "1s" +} diff --git a/conf/es/release/mapping.json b/conf/es/release/mapping.json new file mode 100644 index 0000000..fb75ac8 --- /dev/null +++ b/conf/es/release/mapping.json @@ -0,0 +1,271 @@ +{ + "dynamic": false, + "properties": { + "abstract": { + "fields": { + "analyzed": { + "analyzer": "standard", + "fielddata": { + "format": "disabled" + }, + "store": true, + "type": "string" + } + }, + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "archive": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "author": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "authorized": { + "type": "boolean" + }, + "changes_file": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "checksum_md5": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "checksum_sha256": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "date": { + "format": "strict_date_optional_time||epoch_millis", + "type": "date" + }, + "dependency": { + "dynamic": false, + "include_in_root": true, + "properties": { + "module": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "phase": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "relationship": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "version": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + } + }, + "type": "nested" + }, + "deprecated": { + "type": "boolean" + }, + "distribution": { + "fields": { + "analyzed": { + "analyzer": "standard", + "fielddata": { + "format": "disabled" + }, + "store": true, + "type": "string" + }, + "camelcase": { + "analyzer": "camelcase", + "store": true, + "type": "string" + }, + "lowercase": { + "analyzer": "lowercase", + "store": true, + "type": "string" + } + }, + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "download_url": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "first": { + "type": "boolean" + }, + "id": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "license": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "main_module": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "maturity": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "name": { + "fields": { + "analyzed": { + "analyzer": "standard", + "fielddata": { + "format": "disabled" + }, + "store": true, + "type": "string" + }, + "camelcase": { + "analyzer": "camelcase", + "store": true, + "type": "string" + }, + "lowercase": { + "analyzer": "lowercase", + "store": true, + "type": "string" + } + }, + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "provides": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "resources": { + "dynamic": true, + "include_in_root": true, + "properties": { + "bugtracker": { + "dynamic": "true", + "include_in_root": true, + "properties": { + "mailto": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "web": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + } + }, + "type": "nested" + }, + "homepage": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "license": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "repository": { + "dynamic": "true", + "include_in_root": true, + "properties": { + "type": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "url": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "web": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + } + }, + "type": "nested" + } + }, + "type": "nested" + }, + "stat": { + "dynamic": true, + "properties": { + "gid": { + "type": "long" + }, + "mode": { + "type": "integer" + }, + "mtime": { + "type": "integer" + }, + "size": { + "type": "integer" + }, + "uid": { + "type": "long" + } + } + }, + "status": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "tests": { + "dynamic": true, + "properties": { + "fail": { + "type": "integer" + }, + "na": { + "type": "integer" + }, + "pass": { + "type": "integer" + }, + "unknown": { + "type": "integer" + } + } + }, + "version": { + "ignore_above": 2048, + "index": "not_analyzed", + "type": "string" + }, + "version_numified": { + "type": "float" + } + } +} diff --git a/conf/es/release/settings.json b/conf/es/release/settings.json new file mode 100644 index 0000000..1d48454 --- /dev/null +++ b/conf/es/release/settings.json @@ -0,0 +1,56 @@ +{ + "analysis": { + "analyzer": { + "camelcase": { + "filter": [ + "lowercase", + "unique" + ], + "tokenizer": "camelcase", + "type": "custom" + }, + "edge": { + "filter": [ + "lowercase", + "edge" + ], + "tokenizer": "standard", + "type": "custom" + }, + "edge_camelcase": { + "filter": [ + "lowercase", + "edge" + ], + "tokenizer": "camelcase", + "type": "custom" + }, + "fulltext": { + "type": "english" + }, + "lowercase": { + "filter": "lowercase", + "tokenizer": "keyword" + } + }, + "filter": { + "edge": { + "max_gram": "20", + "min_gram": "1", + "type": "edge_ngram" + } + }, + "tokenizer": { + "camelcase": { + "pattern": "([^\\p{L}\\d]+)|(?<=\\D)(?=\\d)|(?<=\\d)(?=\\D)|(?<=[\\p{L}&&[^\\p{Lu}]])(?=\\p{Lu})|(?<=\\p{Lu})(?=\\p{Lu}[\\p{L}&&[^\\p{Lu}]])", + "type": "pattern" + } + } + }, + "mapper": { + "dynamic": "false" + }, + "number_of_replicas": 1, + "number_of_shards": 1, + "refresh_interval": "1s" +} diff --git a/conf/es/session/mapping.json b/conf/es/session/mapping.json new file mode 100644 index 0000000..d4f130c --- /dev/null +++ b/conf/es/session/mapping.json @@ -0,0 +1,3 @@ +{ + "dynamic": false +} diff --git a/conf/es/session/settings.json b/conf/es/session/settings.json new file mode 100644 index 0000000..c90ef6c --- /dev/null +++ b/conf/es/session/settings.json @@ -0,0 +1,8 @@ +{ + "mapper": { + "dynamic": "false" + }, + "number_of_replicas": 1, + "number_of_shards": 1, + "refresh_interval": "1s" +} diff --git a/conf/es/settings.json b/conf/es/settings.json new file mode 100644 index 0000000..1d48454 --- /dev/null +++ b/conf/es/settings.json @@ -0,0 +1,56 @@ +{ + "analysis": { + "analyzer": { + "camelcase": { + "filter": [ + "lowercase", + "unique" + ], + "tokenizer": "camelcase", + "type": "custom" + }, + "edge": { + "filter": [ + "lowercase", + "edge" + ], + "tokenizer": "standard", + "type": "custom" + }, + "edge_camelcase": { + "filter": [ + "lowercase", + "edge" + ], + "tokenizer": "camelcase", + "type": "custom" + }, + "fulltext": { + "type": "english" + }, + "lowercase": { + "filter": "lowercase", + "tokenizer": "keyword" + } + }, + "filter": { + "edge": { + "max_gram": "20", + "min_gram": "1", + "type": "edge_ngram" + } + }, + "tokenizer": { + "camelcase": { + "pattern": "([^\\p{L}\\d]+)|(?<=\\D)(?=\\d)|(?<=\\d)(?=\\D)|(?<=[\\p{L}&&[^\\p{Lu}]])(?=\\p{Lu})|(?<=\\p{Lu})(?=\\p{Lu}[\\p{L}&&[^\\p{Lu}]])", + "type": "pattern" + } + } + }, + "mapper": { + "dynamic": "false" + }, + "number_of_replicas": 1, + "number_of_shards": 1, + "refresh_interval": "1s" +} diff --git a/lib/MetaCPAN/Ingest.pm b/lib/MetaCPAN/Ingest.pm index 019e059..07cc036 100644 --- a/lib/MetaCPAN/Ingest.pm +++ b/lib/MetaCPAN/Ingest.pm @@ -8,7 +8,8 @@ use Cpanel::JSON::XS; use Digest::SHA; use Encode qw< decode_utf8 >; use IO::Prompt::Tiny qw< prompt >; -use IPC::Run3 (); +use File::Basename (); +use File::Spec (); use LWP::UserAgent; use Path::Tiny qw< path >; use PAUSE::Permissions (); @@ -183,17 +184,14 @@ sub handle_error ( $exit_code, $error, $die_always ) { } sub home () { - IPC::Run3::run3( - [ qw< git rev-parse --show-toplevel > ], # TODO: use alternative persistent path that's accessible from the container - \undef, \my $stdout, \my $stderr - ); - - die $stderr if ($?); - - chomp $stdout; - die "Failed to find git dir: '$stdout'" unless -d $stdout; - - return $stdout; + my $dir = Cwd::abs_path( File::Spec->catdir( + File::Basename::dirname(__FILE__), + ( File::Spec->updir ) x 2 + ) ); + + my $path = path($dir); + die "Failed to find git dir: '$path'" unless $path; + return $path; } # TODO: there must be a better way diff --git a/lib/MetaCPAN/Mapper.pm b/lib/MetaCPAN/Mapper.pm new file mode 100644 index 0000000..190404a --- /dev/null +++ b/lib/MetaCPAN/Mapper.pm @@ -0,0 +1,52 @@ +package MetaCPAN::Mapper; + +use strict; +use warnings; +use v5.36; + +use Path::Tiny qw< path >; +use MetaCPAN::Logger qw< :log :dlog >; +use Search::Elasticsearch; +use MetaCPAN::Ingest qw< config home >; + +sub new ( $class, %args ) { + my $node = $args{node}; + + my $config = config; + $node ||= $config->{es_node}; + $node or die "Cannot create an ES instance without a node\n"; + + return bless { + es => Search::Elasticsearch->new( + client => '2_0::Direct', + nodes => [$node], + ), + }, $class; +} + +sub index_exists ($self, $index) { + $self->{es}->indices->exists( index => $index ); +} + +sub index_create ($self, $index) { + $self->{es}->indices->create( index => $index ); +} + +sub index_delete ($self, $index) { + $self->{es}->indices->delete( index => $index ); +} + +sub index_put_mapping ($self, $index, $type, $mapping) { + $self->{es}->indices->put_mapping( + index => $index, + type => $type, + body => $mapping, + ); +} + +sub get_mapping ($self, $index) { +# my $home = home(); +# my $file = $dir->child(''); +} + +1;