Skip to content

Commit f6ee940

Browse files
authored
Merge pull request #121 from scrapinghub/skip_empty_on_flattening_og
OpenGraph skipping empty for properties with values on flattening
2 parents 6df8e19 + 5fa98cb commit f6ee940

File tree

2 files changed

+29
-1
lines changed

2 files changed

+29
-1
lines changed

extruct/uniform.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,13 @@
44
def _uopengraph(extracted):
55
out = []
66
for obj in extracted:
7-
flattened = dict(reversed(obj['properties']))
7+
# In order of appearance in the page
8+
properties = list(reversed(obj['properties']))
9+
# Ensuring that never empty value is returned if there is a duplicated
10+
# property with non empty value
11+
non_empty_props = {k for k, v in properties if v and v.strip()}
12+
flattened = {k: v for k, v in properties
13+
if k not in non_empty_props or (v and v.strip())}
814
t = flattened.pop('og:type', None)
915
if t:
1016
flattened['@type'] = t

tests/test_uniform.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,28 @@ def test_uopengraph_duplicated_priorities(self):
3737
for k in range(5):
3838
assert data[0]['prop_{}'.format(k)] == 'value_0'
3939

40+
# Ensures that empty is not returned if a property contains any
41+
# non empty value
42+
data = _uopengraph([{'properties':
43+
[('prop_empty', ' '),
44+
45+
('prop_non_empty', ' '),
46+
('prop_non_empty', 'value!'),
47+
48+
('prop_non_empty2', 'value!'),
49+
('prop_non_empty2', ' '),
50+
51+
('prop_non_empty3', ' '),
52+
('prop_non_empty3', 'value!'),
53+
('prop_non_empty3', 'other value'),
54+
],
55+
'namespace': 'namespace'}])
56+
assert data[0]['prop_empty'] == ' '
57+
assert data[0]['prop_non_empty'] == 'value!'
58+
assert data[0]['prop_non_empty2'] == 'value!'
59+
assert data[0]['prop_non_empty3'] == 'value!'
60+
61+
4062
def test_umicroformat(self):
4163
expected = [ { '@context': 'http://microformats.org/wiki/',
4264
'@type': ['h-hidden-phone', 'h-hidden-tablet'],

0 commit comments

Comments
 (0)