Skip to content

Commit 58fd76b

Browse files
committed
changed the approach to updating the initial context
1 parent c290fc8 commit 58fd76b

File tree

1 file changed

+8
-43
lines changed

1 file changed

+8
-43
lines changed

extruct/rdfa.py

Lines changed: 8 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -23,28 +23,15 @@
2323

2424
initial_context["http://www.w3.org/2011/rdfa-context/rdfa-1.1"].ns.update({
2525
"twitter": "https://dev.twitter.com/cards#",
26-
"fb": "http://ogp.me/ns/fb#"
26+
"fb": "http://ogp.me/ns/fb#",
27+
'og': 'http://ogp.me/ns#',
28+
'music': 'http://ogp.me/ns/music#',
29+
'video': 'http://ogp.me/ns/video#',
30+
'article': 'http://ogp.me/ns/article#',
31+
'book': 'http://ogp.me/ns/book#',
32+
'profile': 'http://ogp.me/ns/profile#'
2733
})
2834

29-
_OG_NAMESPACES = {
30-
'og': 'http://ogp.me/ns#',
31-
'music': 'http://ogp.me/ns/music#',
32-
'video': 'http://ogp.me/ns/video#',
33-
'article': 'http://ogp.me/ns/article#',
34-
'book': 'http://ogp.me/ns/book#',
35-
'profile': 'http://ogp.me/ns/profile#'
36-
}
37-
38-
_OG_NAMESPACES_TAGS = {
39-
'og': 'xmlns:og',
40-
'music': 'xmlns:music',
41-
'video': 'xmlns:video',
42-
'article': 'xmlns:article',
43-
'book': 'xmlns:book',
44-
'profile': 'xmlns:profile'
45-
}
46-
47-
4835
class RDFaExtractor(object):
4936

5037
def extract(self, htmlstring, base_url=None, encoding="UTF-8",
@@ -61,28 +48,6 @@ def extract_items(self, document, base_url=None, expanded=True):
6148
vocab_cache_report=False,
6249
refresh_vocab_cache=False,
6350
check_lite=False)
64-
document = self.expandedOGSupport(document)
6551
g = PyRdfa(options, base=base_url).graph_from_DOM(document, graph=Graph(), pgraph=Graph())
6652
jsonld_string = g.serialize(format='json-ld', auto_compact=not expanded).decode('utf-8')
67-
return json.loads(jsonld_string)
68-
69-
def expandedOGSupport(self,document):
70-
prefixDic = {}
71-
for head in document.xpath('//head'):
72-
for el in head.xpath('meta[@property and @content]'):
73-
prop = el.attrib['property']
74-
ns = prop.partition(':')[0]
75-
if ns in _OG_NAMESPACES.keys():
76-
prefixDic[_OG_NAMESPACES_TAGS[ns]] = _OG_NAMESPACES[ns]
77-
78-
html_element = None
79-
for element in document.iter():
80-
if element.tag == 'html':
81-
html_element = element
82-
break
83-
84-
if html_element is not None:
85-
for k in prefixDic.keys():
86-
if not (html_element.get(k)):
87-
html_element.set(k,prefixDic[k])
88-
return document
53+
return json.loads(jsonld_string)

0 commit comments

Comments
 (0)