From 9c6e8c4c4c5957fe84ff503e7b55f99339f5db4a Mon Sep 17 00:00:00 2001 From: ivan Date: Fri, 26 Jul 2019 13:48:26 +0200 Subject: [PATCH 1/2] OpenGraph skipping empty for properties with values on flattening --- extruct/uniform.py | 8 +++++++- tests/test_uniform.py | 22 ++++++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/extruct/uniform.py b/extruct/uniform.py index 3d7ffd5c..ead34252 100644 --- a/extruct/uniform.py +++ b/extruct/uniform.py @@ -4,7 +4,13 @@ def _uopengraph(extracted): out = [] for obj in extracted: - flattened = dict(reversed(obj['properties'])) + # In order of appearance in the page + properties = list(reversed(obj['properties'])) + # Ensuring that never empty value is returned if there is a duplicated + # property with non empty value + non_empty_props = {k for k, v in properties if v and v.strip()} + flattened = dict((k, v) for k, v in properties + if k not in non_empty_props or (v and v.strip())) t = flattened.pop('og:type', None) if t: flattened['@type'] = t diff --git a/tests/test_uniform.py b/tests/test_uniform.py index e0e3d76b..6859fb27 100644 --- a/tests/test_uniform.py +++ b/tests/test_uniform.py @@ -37,6 +37,28 @@ def test_uopengraph_duplicated_priorities(self): for k in range(5): assert data[0]['prop_{}'.format(k)] == 'value_0' + # Ensures that empty is not returned if a property contains any + # non empty value + data = _uopengraph([{'properties': + [('prop_empty', ' '), + + ('prop_non_empty', ' '), + ('prop_non_empty', 'value!'), + + ('prop_non_empty2', 'value!'), + ('prop_non_empty2', ' '), + + ('prop_non_empty3', ' '), + ('prop_non_empty3', 'value!'), + ('prop_non_empty3', 'other value'), + ], + 'namespace': 'namespace'}]) + assert data[0]['prop_empty'] == ' ' + assert data[0]['prop_non_empty'] == 'value!' + assert data[0]['prop_non_empty2'] == 'value!' + assert data[0]['prop_non_empty3'] == 'value!' + + def test_umicroformat(self): expected = [ { '@context': 'http://microformats.org/wiki/', '@type': ['h-hidden-phone', 'h-hidden-tablet'], From 5fa98cbe77844fee1174a5d6659b99c02763e0ea Mon Sep 17 00:00:00 2001 From: ivan Date: Fri, 26 Jul 2019 14:02:15 +0200 Subject: [PATCH 2/2] Little code simplification --- extruct/uniform.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/extruct/uniform.py b/extruct/uniform.py index ead34252..5f13a12e 100644 --- a/extruct/uniform.py +++ b/extruct/uniform.py @@ -9,8 +9,8 @@ def _uopengraph(extracted): # Ensuring that never empty value is returned if there is a duplicated # property with non empty value non_empty_props = {k for k, v in properties if v and v.strip()} - flattened = dict((k, v) for k, v in properties - if k not in non_empty_props or (v and v.strip())) + flattened = {k: v for k, v in properties + if k not in non_empty_props or (v and v.strip())} t = flattened.pop('og:type', None) if t: flattened['@type'] = t