Skip to content

Commit 3970f79

Browse files
author
Harry
committed
fix: handle unexpected jsonld format
1 parent 09d7f47 commit 3970f79

File tree

1 file changed

+2
-1
lines changed

1 file changed

+2
-1
lines changed

extruct/jsonld.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
from extruct.utils import parse_html
1313

14+
HTML_OR_JS_COMMENTLINE = re.compile(r'^\s*(//.*|<!--.*-->)')
1415

1516

1617
class JsonLdExtractor(object):
@@ -34,7 +35,7 @@ def _extract_items(self, node):
3435
data = json.loads(script, strict=False)
3536
except ValueError:
3637
# sometimes JSON-decoding errors are due to leading HTML or JavaScript comments
37-
data = jstyleson.loads(script, strict=False)
38+
data = jstyleson.loads(HTML_OR_JS_COMMENTLINE.sub('', script),strict=False)
3839
if isinstance(data, list):
3940
return data
4041
elif isinstance(data, dict):

0 commit comments

Comments
 (0)