Skip to content

Commit 97a1c66

Browse files
authored
Fix: bnode and variable validation (#306)
1 parent a4d938a commit 97a1c66

File tree

5 files changed

+32
-22
lines changed

5 files changed

+32
-22
lines changed

src/rdf4cpp/BlankNode.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ std::ostream &operator<<(std::ostream &os, BlankNode const &bnode) {
9191

9292
void BlankNode::validate(std::string_view v) {
9393
using namespace util::char_matcher_detail;
94-
static constexpr auto first_matcher = ASCIINumMatcher{} | PNCharsBaseMatcher;
94+
static constexpr auto first_matcher = ASCIINumMatcher{} | PNCharsUMatcher;
9595
auto r = v | una::views::utf8;
9696
auto it = r.begin();
9797
if (it == r.end()) {
@@ -102,7 +102,7 @@ void BlankNode::validate(std::string_view v) {
102102
}
103103
auto lastchar = *it;
104104
++it;
105-
static constexpr auto pn_matcher = PNCharsMatcher | ASCIIPatternMatcher(".-");
105+
static constexpr auto pn_matcher = PNCharsMatcher | ASCIIPatternMatcher{"."};
106106
while (it != r.end()) {
107107
if (!pn_matcher.match(*it))
108108
{

src/rdf4cpp/query/Variable.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ Variable::Variable() noexcept : Node{storage::identifier::NodeBackendHandle{{},
1010
}
1111

1212
Variable::Variable(std::string_view name, bool anonymous, storage::DynNodeStoragePtr node_storage)
13-
: Variable{make_unchecked((validate(name), name), anonymous, node_storage)} {}
13+
: Variable{make_unchecked((validate(name, anonymous), name), anonymous, node_storage)} {}
1414

1515
Variable::Variable(storage::identifier::NodeBackendHandle handle) noexcept : Node{handle} {}
1616

@@ -108,9 +108,13 @@ std::ostream &operator<<(std::ostream &os, Variable const &variable) {
108108
return os;
109109
}
110110

111-
void Variable::validate(std::string_view n) {
111+
void Variable::validate(std::string_view n, bool anonymous) {
112+
if (anonymous) {
113+
return BlankNode::validate(n);
114+
}
115+
112116
using namespace util::char_matcher_detail;
113-
static constexpr auto first_matcher = ASCIINumMatcher{} | PNCharsBaseMatcher;
117+
static constexpr auto first_matcher = ASCIINumMatcher{} | PNCharsUMatcher;
114118
auto r = n | una::views::utf8;
115119
auto it = r.begin();
116120
if (it == r.end()) {
@@ -120,9 +124,10 @@ void Variable::validate(std::string_view n) {
120124
throw InvalidNode(std::format("invalid blank node label {}", n));
121125
}
122126
++it;
127+
128+
static constexpr auto matcher = ASCIINumMatcher{} | PNCharsUMatcher | PNChars_UnicodePartMatcher{};
123129
while (it != r.end()) {
124-
if (!PNCharsMatcher.match(*it))
125-
{
130+
if (!matcher.match(*it)) {
126131
throw InvalidNode(std::format("invalid blank node label {}", n));
127132
}
128133
++it;

src/rdf4cpp/query/Variable.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,10 @@ struct Variable : Node {
4646
/**
4747
* Validates that the given name is a valid Variable name
4848
* @param var_name name to check
49+
* @param anonymous if the variable is supposed to be anonymous
4950
* @throws ParsingError if the variable name is not valid
5051
*/
51-
static void validate(std::string_view var_name);
52+
static void validate(std::string_view var_name, bool anonymous = false);
5253

5354
[[nodiscard]] bool is_anonymous() const;
5455

src/rdf4cpp/util/CharMatcher.hpp

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -264,12 +264,11 @@ struct IPrivateMatcher {
264264
};
265265

266266
/**
267-
* matches PN_CHARS_BASE of the Turtle/SPARQL specification, without ASCII alpha (or with ASCIIAlphaMatcher).
267+
* Matches the unicode part (the characters listed as numbers) of PN_CHARS_BASE of the Turtle/SPARQL specification
268268
*/
269-
struct PNCharsBase_UniMatcher {
269+
struct PNCharsBase_UnicodePartMatcher {
270270
[[nodiscard]] static constexpr bool match(int c) noexcept {
271-
return c == '_' ||
272-
(c >= 0xC0 && c <= 0xD6) ||
271+
return (c >= 0xC0 && c <= 0xD6) ||
273272
(c >= 0xD8 && c <= 0xF6) ||
274273
(c >= 0xF8 && c <= 0x02FF) ||
275274
(c >= 0x0370 && c <= 0x037D) ||
@@ -296,12 +295,17 @@ struct PNCharsBase_UniMatcher {
296295
/**
297296
* matches PN_CHARS_BASE of the Turtle/SPARQL specification
298297
*/
299-
constexpr auto PNCharsBaseMatcher = ASCIIAlphaMatcher{} | PNCharsBase_UniMatcher{};
298+
constexpr auto PNCharsBaseMatcher = ASCIIAlphaMatcher{} | PNCharsBase_UnicodePartMatcher{};
300299

301300
/**
302-
* matches PN_CHARS of the Turtle/SPARQL specification, without ASCII num and PN_CHARS_BASE.
301+
* matches PN_CHARS_U of the Turtle/SPARQL specificiation
303302
*/
304-
struct PNChars_UniMatcher {
303+
constexpr auto PNCharsUMatcher = ASCIIPatternMatcher{"_"} | PNCharsBaseMatcher;
304+
305+
/**
306+
* Matches the unicode part (the characters listed as numbers) of PN_CHARS of the Turtle/SPARQL specification
307+
*/
308+
struct PNChars_UnicodePartMatcher {
305309
[[nodiscard]] static constexpr bool match(int c) noexcept {
306310
return c == 0xB7 ||
307311
(c >= 0x0300 && c <= 0x036F) ||
@@ -321,7 +325,7 @@ struct PNChars_UniMatcher {
321325
/**
322326
* matches PN_CHARS of the Turtle/SPARQL specification.
323327
*/
324-
constexpr auto PNCharsMatcher = ASCIINumMatcher{} | PNCharsBaseMatcher | PNChars_UniMatcher{};
328+
constexpr auto PNCharsMatcher = ASCIINumMatcher{} | ASCIIPatternMatcher{"-"} | PNCharsUMatcher | PNChars_UnicodePartMatcher{};
325329

326330
/**
327331
* iterates over s and tries to match all in m.

tests/query/tests_Variable.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -61,13 +61,13 @@ TEST_CASE("Variable::find") {
6161
}
6262

6363
TEST_CASE("Variable validity") {
64-
query::Variable n{};
65-
CHECK_THROWS_AS(n = query::Variable::make_named("\U00000312_not_first"), InvalidNode);
66-
CHECK_THROWS_AS(n = query::Variable::make_anonymous("no-"), InvalidNode);
67-
CHECK_THROWS_AS(n = query::Variable::make_anonymous("-no"), InvalidNode);
68-
CHECK_THROWS_AS(n = query::Variable("may_not_contain."), InvalidNode);
64+
CHECK_THROWS_AS((void) query::Variable::make_named("\U00000312_not_first"), InvalidNode);
65+
CHECK_THROWS_AS((void) query::Variable::make_named("no-"), InvalidNode);
66+
CHECK(query::Variable::make_anonymous("no-").name() == "no-");
67+
CHECK_THROWS_AS((void) query::Variable::make_named("-no"), InvalidNode);
68+
CHECK_THROWS_AS((void) query::Variable::make_anonymous("-no"), InvalidNode);
69+
CHECK_THROWS_AS((void) query::Variable("may_not_contain."), InvalidNode);
6970
CHECK(query::Variable::make_unchecked("may_not_contain.").name() == "may_not_contain.");
7071
CHECK(query::Variable::make_named("012_numbers_too567").name() == "012_numbers_too567");
7172
CHECK(query::Variable::make_named("\U0001f34cthrow_some_unicode_at_it\U0001f34c").name() == "\U0001f34cthrow_some_unicode_at_it\U0001f34c");
72-
CHECK(n == query::Variable{});
7373
}

0 commit comments

Comments
 (0)