From 8702405b7090d9234b185e154193e5b98aa1910f Mon Sep 17 00:00:00 2001 From: Tom Gillespie Date: Sat, 25 Jan 2025 18:30:38 -0800 Subject: [PATCH 1/2] added n3 test to check for internal float normalization made as a separate commit to illustrate the old broken behavior priro to the fix in the next commit --- test/test_n3.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/test/test_n3.py b/test/test_n3.py index f3d7eeb07..fe25f6014 100644 --- a/test/test_n3.py +++ b/test/test_n3.py @@ -251,6 +251,32 @@ def test_empty_prefix(self): g2 ), "Document with declared empty prefix must match default #" + def test_float_no_norm(self): + import rdflib + _ps = rdflib.NORMALIZE_LITERALS + try: + bads = [] + for norm_lit in (True, False): + rdflib.NORMALIZE_LITERALS = norm_lit + g1 = Graph() + g1.parse(data=":a :b 1e10, 1e0 .", format="n3") + strep = [str(o) for o in g1.objects()] + if norm_lit: + if '1e10' not in strep and '1e0' not in strep: + pass + else: + bads.append(('NOT normalized when should have been', strep)) + else: + if '1e10' in strep and '1e0' in strep: + pass + else: + bads.append(('normalized when it should NOT have been', strep)) + + finally: + rdflib.NORMALIZE_LITERALS = _ps + + assert not bads, bads + class TestRegularExpressions: def test_exponents(self): From 66f626c169f7972c212797251256b85a2d73441b Mon Sep 17 00:00:00 2001 From: Tom Gillespie Date: Sat, 25 Jan 2025 18:39:27 -0800 Subject: [PATCH 2/2] notation3.py: don't normalize float representation fix behavior of the n3 parser family to avoid normalizing raw float string representation which makes it impossible to roundtrip the exact original string representation of e.g. 1e10 --- rdflib/plugins/parsers/notation3.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/rdflib/plugins/parsers/notation3.py b/rdflib/plugins/parsers/notation3.py index acc56215b..7dd87d50a 100755 --- a/rdflib/plugins/parsers/notation3.py +++ b/rdflib/plugins/parsers/notation3.py @@ -376,6 +376,10 @@ def unicodeExpand(m: Match) -> str: langcode = re.compile(r"[a-zA-Z0-9]+(-[a-zA-Z0-9]+)*") +class sfloat(str): + """ don't normalize raw XSD.double string representation """ + + class SinkParser: def __init__( self, @@ -1522,7 +1526,7 @@ def nodeOrLiteral(self, argstr: str, i: int, res: MutableSequence[Any]) -> int: m = exponent_syntax.match(argstr, i) if m: j = m.end() - res.append(float(argstr[i:j])) + res.append(sfloat(argstr[i:j])) return j m = decimal_syntax.match(argstr, i) @@ -1913,7 +1917,7 @@ def normalise(self, f: Formula | Graph | None, n: int) -> Literal: ... def normalise(self, f: Formula | Graph | None, n: Decimal) -> Literal: ... @overload - def normalise(self, f: Formula | Graph | None, n: float) -> Literal: ... + def normalise(self, f: Formula | Graph | None, n: sfloat) -> Literal: ... @overload def normalise(self, f: Formula | Graph | None, n: Node) -> Node: ... @@ -1921,7 +1925,7 @@ def normalise(self, f: Formula | Graph | None, n: Node) -> Node: ... def normalise( self, f: Formula | Graph | None, - n: Union[tuple[int, str], bool, int, Decimal, float, Node, _AnyT], + n: Union[tuple[int, str], bool, int, Decimal, sfloat, Node, _AnyT], ) -> Union[URIRef, Literal, BNode, Node, _AnyT]: if isinstance(n, tuple): return URIRef(str(n[1])) @@ -1941,7 +1945,7 @@ def normalise( s = Literal(value, datatype=DECIMAL_DATATYPE) return s - if isinstance(n, float): + if isinstance(n, sfloat): s = Literal(str(n), datatype=DOUBLE_DATATYPE) return s