Skip to content

Commit f66c825

Browse files
authored
Merge pull request #140 from ShivinDass/issue31
Support for expanded opengraph metadata
2 parents 0648f1a + d9f7c8e commit f66c825

File tree

4 files changed

+159
-3
lines changed

4 files changed

+159
-3
lines changed

extruct/rdfa.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,13 @@
2323

2424
initial_context["http://www.w3.org/2011/rdfa-context/rdfa-1.1"].ns.update({
2525
"twitter": "https://dev.twitter.com/cards#",
26-
"fb": "http://ogp.me/ns/fb#"
26+
"fb": "http://ogp.me/ns/fb#",
27+
"og": "http://ogp.me/ns#",
28+
"music": "http://ogp.me/ns/music#",
29+
"video": "http://ogp.me/ns/video#",
30+
"article": "http://ogp.me/ns/article#",
31+
"book": "http://ogp.me/ns/book#",
32+
"profile": "http://ogp.me/ns/profile#"
2733
})
2834

2935

@@ -43,7 +49,6 @@ def extract_items(self, document, base_url=None, expanded=True):
4349
vocab_cache_report=False,
4450
refresh_vocab_cache=False,
4551
check_lite=False)
46-
4752
g = PyRdfa(options, base=base_url).graph_from_DOM(document, graph=Graph(), pgraph=Graph())
4853
jsonld_string = g.serialize(format='json-ld', auto_compact=not expanded).decode('utf-8')
49-
return json.loads(jsonld_string)
54+
return json.loads(jsonld_string)
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "https://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
2+
<html xmlns="https://www.w3.org/1999/xhtml" xmlns:og="https://ogp.me/ns#" xmlns:fb="https://www.facebook.com/2008/fbml">
3+
<head>
4+
<title>Himanshu's Open Graph Protocol</title>
5+
<meta http-equiv="Content-Type" content="text/html;charset=WINDOWS-1252" />
6+
<meta http-equiv="Content-Language" content="en-us" />
7+
<link rel="stylesheet" type="text/css" href="event-education.css" />
8+
<meta name="verify-v1" content="so4y/3aLT7/7bUUB9f6iVXN0tv8upRwaccek7JKB1gs=" >
9+
<meta property="og:image" content="https://www.eventeducation.com/images/982336_wedding_dayandouan_th.jpg"/>
10+
<meta property="fb:admins" content="himanshu160"/>
11+
<meta property="og:site_name" content="Event Education"/>
12+
13+
<meta property="og:url" content="http://www.nytimes.com/2016/12/15/arts/music/from-steet-theater-to-wagner-on-the-opera-stage.html" />
14+
<meta property="og:type" content="article" />
15+
<meta property="og:title" content="From Street Theater to Wagner on the Opera Stage" />
16+
<meta property="og:description" content="which he set in Bangladesh instead of Norway. The production opens in Madrid on Saturday." />
17+
<meta property="article:published" itemprop="datePublished" content="2016-12-15T05:55:55-05:00" />
18+
<meta property="article:modified" itemprop="dateModified" content="2016-12-15T06:19:30-05:00" />
19+
<meta property="article:section" itemprop="articleSection" content="Music" />
20+
<meta property="article:section-taxonomy-id" itemprop="articleSection" content="C5BFA7D5-359C-427B-90E6-6B7245A6CDD8" />
21+
<meta property="article:section_url" content="http://www.nytimes.com/section/arts" />
22+
<meta property="article:top-level-section" content="arts" />
23+
<meta property="fb:app_id" content="9869919170" />
24+
<meta property="music:duration" content="60" />
25+
<meta property="video:tag" content="Exhilerating" />
26+
<meta property="book:release_date" content="2016-12-15T06:19:30-05:00" />
27+
<meta property="profile:first_name" content="John" />
28+
<meta property="profile:last_name" content="Lennon" />
29+
</head>
30+
<body>
31+
<div id="fb-root"></div>
32+
<script>(function(d, s, id) {
33+
var js, fjs = d.getElementsByTagName(s)[0];
34+
if (d.getElementById(id)) return;
35+
js = d.createElement(s); js.id = id;
36+
js.src = "//connect.facebook.net/en_US/all.js#xfbml=1&appId=501839739845103";
37+
fjs.parentNode.insertBefore(js, fjs);
38+
}(document, 'script', 'facebook-jssdk'));</script>
39+
</body>
40+
</html>
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
[
2+
{
3+
"https://ogp.me/ns#url": [
4+
{
5+
"@value": "http://www.nytimes.com/2016/12/15/arts/music/from-steet-theater-to-wagner-on-the-opera-stage.html"
6+
}
7+
],
8+
"http://ogp.me/ns/profile#first_name": [
9+
{
10+
"@value": "John"
11+
}
12+
],
13+
"https://ogp.me/ns#type": [
14+
{
15+
"@value": "article"
16+
}
17+
],
18+
"http://ogp.me/ns/article#section": [
19+
{
20+
"@value": "Music"
21+
}
22+
],
23+
"http://ogp.me/ns/music#duration": [
24+
{
25+
"@value": "60"
26+
}
27+
],
28+
"http://ogp.me/ns/article#modified": [
29+
{
30+
"@value": "2016-12-15T06:19:30-05:00"
31+
}
32+
],
33+
"http://ogp.me/ns/video#tag": [
34+
{
35+
"@value": "Exhilerating"
36+
}
37+
],
38+
"https://ogp.me/ns#site_name": [
39+
{
40+
"@value": "Event Education"
41+
}
42+
],
43+
"http://ogp.me/ns/profile#last_name": [
44+
{
45+
"@value": "Lennon"
46+
}
47+
],
48+
"https://www.facebook.com/2008/fbmladmins": [
49+
{
50+
"@value": "himanshu160"
51+
}
52+
],
53+
"http://ogp.me/ns/article#section_url": [
54+
{
55+
"@value": "http://www.nytimes.com/section/arts"
56+
}
57+
],
58+
"https://ogp.me/ns#title": [
59+
{
60+
"@value": "From Street Theater to Wagner on the Opera Stage"
61+
}
62+
],
63+
"https://www.facebook.com/2008/fbmlapp_id": [
64+
{
65+
"@value": "9869919170"
66+
}
67+
],
68+
"https://ogp.me/ns#image": [
69+
{
70+
"@value": "https://www.eventeducation.com/images/982336_wedding_dayandouan_th.jpg"
71+
}
72+
],
73+
"http://ogp.me/ns/book#release_date": [
74+
{
75+
"@value": "2016-12-15T06:19:30-05:00"
76+
}
77+
],
78+
"http://ogp.me/ns/article#section-taxonomy-id": [
79+
{
80+
"@value": "C5BFA7D5-359C-427B-90E6-6B7245A6CDD8"
81+
}
82+
],
83+
"http://ogp.me/ns/article#published": [
84+
{
85+
"@value": "2016-12-15T05:55:55-05:00"
86+
}
87+
],
88+
"https://ogp.me/ns#description": [
89+
{
90+
"@value": "which he set in Bangladesh instead of Norway. The production opens in Madrid on Saturday."
91+
}
92+
],
93+
"@id": "http://www.example.com/index.html",
94+
"http://ogp.me/ns/article#top-level-section": [
95+
{
96+
"@value": "arts"
97+
}
98+
]
99+
}
100+
]

tests/test_rdfa.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,3 +100,14 @@ def test_wikipedia_xhtml_rdfa_no_prefix(self):
100100
data = rdfae.extract(body, base_url='http://nielslubberman.nl/drupal/')
101101

102102
self.assertJsonLDEqual(data, expected)
103+
104+
def test_expanded_opengraph_support(self):
105+
body = get_testdata('misc','expanded_OG_support_test.html')
106+
expected = json.loads(
107+
get_testdata('misc','expanded_OG_support_test.json'
108+
).decode('UTF-8'))
109+
110+
rdfae = RDFaExtractor()
111+
data = rdfae.extract(body, base_url='http://www.example.com/index.html')
112+
113+
self.assertJsonLDEqual(data,expected)

0 commit comments

Comments
 (0)