Skip to content

Commit 2cc3ff9

Browse files
authored
Merge pull request #76 from scrapinghub/metadata-normalize
Normalize the spider params schema.
2 parents 37ec9fa + 842716a commit 2cc3ff9

File tree

3 files changed

+95
-2
lines changed

3 files changed

+95
-2
lines changed

sh_scrapy/commands/shub_image_info.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def run(self, args, opts):
4545
result['metadata'] = {}
4646
for spider_name in result['spiders']:
4747
spider_cls = self.crawler_process.spider_loader.load(spider_name)
48-
metadata_dict = get_spider_metadata(spider_cls)
48+
metadata_dict = get_spider_metadata(spider_cls, normalize=True)
4949
try:
5050
# make sure it's serializable
5151
json.dumps(metadata_dict)

tests/test_crawl.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,3 +343,95 @@ class MySpider(Spider):
343343
if not SPIDER_METADATA_AVAILABLE:
344344
del expected["metadata"]
345345
assert data == expected
346+
347+
348+
@pytest.mark.skipif(not SPIDER_METADATA_AVAILABLE, reason="scrapy-spider-metadata is not installed")
349+
def test_image_info_args(tmp_path):
350+
project_dir = create_project(tmp_path, spider_text="""
351+
from enum import Enum
352+
from scrapy import Spider
353+
from scrapy_spider_metadata import Args
354+
from pydantic import BaseModel, Field
355+
356+
class ToolEnum(Enum):
357+
spanner = "spanner"
358+
wrench = "wrench"
359+
360+
class Parameters(BaseModel):
361+
tool: ToolEnum = ToolEnum.spanner
362+
363+
class MySpider(Args[Parameters], Spider):
364+
name = "myspider"
365+
""")
366+
out, _ = call_command(project_dir, "shub-image-info")
367+
data = json.loads(out)
368+
expected = {
369+
"project_type": "scrapy",
370+
"spiders": ["myspider"],
371+
"metadata": {
372+
"myspider": {
373+
"param_schema": {
374+
"properties": {
375+
"tool": {
376+
"default": "spanner",
377+
"enum": ["spanner", "wrench"],
378+
"title": "Tool",
379+
"type": "string",
380+
},
381+
},
382+
"title": "Parameters",
383+
"type": "object",
384+
},
385+
},
386+
},
387+
}
388+
if not SPIDER_METADATA_AVAILABLE:
389+
del expected["metadata"]
390+
assert data == expected
391+
392+
393+
@pytest.mark.skipif(not SPIDER_METADATA_AVAILABLE, reason="scrapy-spider-metadata is not installed")
394+
def test_image_info_args_metadata(tmp_path):
395+
project_dir = create_project(tmp_path, spider_text="""
396+
from enum import Enum
397+
from scrapy import Spider
398+
from scrapy_spider_metadata import Args
399+
from pydantic import BaseModel, Field
400+
401+
class ToolEnum(Enum):
402+
spanner = "spanner"
403+
wrench = "wrench"
404+
405+
class Parameters(BaseModel):
406+
tool: ToolEnum = ToolEnum.spanner
407+
408+
class MySpider(Args[Parameters], Spider):
409+
name = "myspider"
410+
metadata = {"foo": 42}
411+
""")
412+
out, _ = call_command(project_dir, "shub-image-info")
413+
data = json.loads(out)
414+
expected = {
415+
"project_type": "scrapy",
416+
"spiders": ["myspider"],
417+
"metadata": {
418+
"myspider": {
419+
"foo": 42,
420+
"param_schema": {
421+
"properties": {
422+
"tool": {
423+
"default": "spanner",
424+
"enum": ["spanner", "wrench"],
425+
"title": "Tool",
426+
"type": "string",
427+
},
428+
},
429+
"title": "Parameters",
430+
"type": "object",
431+
},
432+
},
433+
},
434+
}
435+
if not SPIDER_METADATA_AVAILABLE:
436+
del expected["metadata"]
437+
assert data == expected

tox.ini

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@ deps =
1010
hubstorage
1111
packaging
1212
py36-scrapy16: Scrapy==1.6
13-
scrapy-spider-metadata; python_version >= "3.8"
13+
scrapy-spider-metadata>=0.1.1; python_version >= "3.8"
14+
pydantic>=2; python_version >= "3.8"
1415

1516
commands =
1617
pytest --verbose --cov=sh_scrapy --cov-report=term-missing --cov-report=html --cov-report=xml {posargs: sh_scrapy tests}

0 commit comments

Comments
 (0)