23
23
24
24
initial_context ["http://www.w3.org/2011/rdfa-context/rdfa-1.1" ].ns .update ({
25
25
"twitter" : "https://dev.twitter.com/cards#" ,
26
- "fb" : "http://ogp.me/ns/fb#"
26
+ "fb" : "http://ogp.me/ns/fb#" ,
27
+ 'og' : 'http://ogp.me/ns#' ,
28
+ 'music' : 'http://ogp.me/ns/music#' ,
29
+ 'video' : 'http://ogp.me/ns/video#' ,
30
+ 'article' : 'http://ogp.me/ns/article#' ,
31
+ 'book' : 'http://ogp.me/ns/book#' ,
32
+ 'profile' : 'http://ogp.me/ns/profile#'
27
33
})
28
34
29
- _OG_NAMESPACES = {
30
- 'og' : 'http://ogp.me/ns#' ,
31
- 'music' : 'http://ogp.me/ns/music#' ,
32
- 'video' : 'http://ogp.me/ns/video#' ,
33
- 'article' : 'http://ogp.me/ns/article#' ,
34
- 'book' : 'http://ogp.me/ns/book#' ,
35
- 'profile' : 'http://ogp.me/ns/profile#'
36
- }
37
-
38
- _OG_NAMESPACES_TAGS = {
39
- 'og' : 'xmlns:og' ,
40
- 'music' : 'xmlns:music' ,
41
- 'video' : 'xmlns:video' ,
42
- 'article' : 'xmlns:article' ,
43
- 'book' : 'xmlns:book' ,
44
- 'profile' : 'xmlns:profile'
45
- }
46
-
47
-
48
35
class RDFaExtractor (object ):
49
36
50
37
def extract (self , htmlstring , base_url = None , encoding = "UTF-8" ,
@@ -61,28 +48,6 @@ def extract_items(self, document, base_url=None, expanded=True):
61
48
vocab_cache_report = False ,
62
49
refresh_vocab_cache = False ,
63
50
check_lite = False )
64
- document = self .expandedOGSupport (document )
65
51
g = PyRdfa (options , base = base_url ).graph_from_DOM (document , graph = Graph (), pgraph = Graph ())
66
52
jsonld_string = g .serialize (format = 'json-ld' , auto_compact = not expanded ).decode ('utf-8' )
67
- return json .loads (jsonld_string )
68
-
69
- def expandedOGSupport (self ,document ):
70
- prefixDic = {}
71
- for head in document .xpath ('//head' ):
72
- for el in head .xpath ('meta[@property and @content]' ):
73
- prop = el .attrib ['property' ]
74
- ns = prop .partition (':' )[0 ]
75
- if ns in _OG_NAMESPACES .keys ():
76
- prefixDic [_OG_NAMESPACES_TAGS [ns ]] = _OG_NAMESPACES [ns ]
77
-
78
- html_element = None
79
- for element in document .iter ():
80
- if element .tag == 'html' :
81
- html_element = element
82
- break
83
-
84
- if html_element is not None :
85
- for k in prefixDic .keys ():
86
- if not (html_element .get (k )):
87
- html_element .set (k ,prefixDic [k ])
88
- return document
53
+ return json .loads (jsonld_string )
0 commit comments