Skip to content

Commit 26ac800

Browse files
Improve error messages (#57)
* Accept NULL values in CSVs with schemas * Report filename and line number in exceptions
1 parent b6b32a4 commit 26ac800

File tree

4 files changed

+20
-7
lines changed

4 files changed

+20
-7
lines changed

redisgraph_bulk_loader/entity_file.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,11 @@ def typed_prop_to_binary(prop_val, prop_type):
6565
# Remove leading and trailing whitespace
6666
prop_val = prop_val.strip()
6767

68+
if prop_val == "":
69+
# An empty string indicates a NULL property.
70+
# TODO This is not allowed in Cypher, consider how to handle it here rather than in-module.
71+
return struct.pack(format_str, 0)
72+
6873
# TODO allow ID type specification
6974
if prop_type == Type.LONG:
7075
try:
@@ -107,7 +112,7 @@ def typed_prop_to_binary(prop_val, prop_type):
107112
return array_prop_to_binary(format_str, prop_val)
108113

109114
# If it hasn't returned by this point, it is trying to set it to a type that it can't adopt
110-
raise Exception("unable to parse [" + prop_val + "] with type ["+repr(prop_type)+"]")
115+
raise SchemaError("unable to parse [" + prop_val + "] with type ["+repr(prop_type)+"]")
111116

112117

113118
# Convert a single CSV property field with an inferred type into a binary stream.
@@ -227,14 +232,14 @@ def convert_header_with_schema(self, header):
227232
# Multiple colons found in column name, emit error.
228233
# TODO might need to check for backtick escapes
229234
if len(pair) > 2:
230-
raise CSVError("Field '%s' had %d colons" % field, len(field))
235+
raise CSVError("%s: Field '%s' had %d colons" % (self.infile.name, field, len(field)))
231236

232237
# Convert the column type.
233238
col_type = convert_schema_type(pair[1].upper().strip())
234239

235240
# If the column did not have a name but the type requires one, emit an error.
236241
if len(pair[0]) == 0 and col_type not in (Type.ID, Type.START_ID, Type.END_ID, Type.IGNORE):
237-
raise SchemaError("Each property in the header should be a colon-separated pair")
242+
raise SchemaError("%s: Each property in the header should be a colon-separated pair" % (self.infile.name))
238243
else:
239244
# We have a column name and a type.
240245
# Only store the name if the column's values should be added as properties.

redisgraph_bulk_loader/label.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,11 @@ def process_entities(self):
6363
id_field = self.id_namespace + '.' + str(id_field)
6464
self.update_node_dictionary(id_field)
6565

66-
row_binary = self.pack_props(row)
66+
try:
67+
row_binary = self.pack_props(row)
68+
except SchemaError as e:
69+
# TODO why is line_num off by one?
70+
raise SchemaError("%s:%d %s" % (self.infile.name, self.reader.line_num - 1, str(e)))
6771
row_binary_len = len(row_binary)
6872
# If the addition of this entity will make the binary token grow too large,
6973
# send the buffer now.

redisgraph_bulk_loader/relation_type.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,12 +61,16 @@ def process_entities(self):
6161
src = self.query_buffer.nodes[start_id]
6262
dest = self.query_buffer.nodes[end_id]
6363
except KeyError as e:
64-
print("Relationship specified a non-existent identifier. src: %s; dest: %s" % (row[self.start_id], row[self.end_id]))
64+
print("%s:%d Relationship specified a non-existent identifier. src: %s; dest: %s" %
65+
(self.infile.name, self.reader.line_num - 1, row[self.start_id], row[self.end_id]))
6566
if self.config.skip_invalid_edges is False:
6667
raise e
6768
continue
6869
fmt = "=QQ" # 8-byte unsigned ints for src and dest
69-
row_binary = struct.pack(fmt, src, dest) + self.pack_props(row)
70+
try:
71+
row_binary = struct.pack(fmt, src, dest) + self.pack_props(row)
72+
except SchemaError as e:
73+
raise SchemaError("%s:%d %s" % (self.infile.name, self.reader.line_num, str(e)))
7074
row_binary_len = len(row_binary)
7175
# If the addition of this entity will make the binary token grow too large,
7276
# send the buffer now.

test/test_bulk_loader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -660,7 +660,7 @@ def test16_error_on_schema_failure(self):
660660
except Exception as e:
661661
# Verify that the correct exception is raised.
662662
self.assertEqual(sys.exc_info()[0].__name__, 'SchemaError')
663-
self.assertIn("Could not parse 'strval' as an array", e.args)
663+
self.assertIn("Could not parse 'strval' as an array", str(e))
664664

665665
def test17_ensure_index_is_created(self):
666666
graphname = "index_test"

0 commit comments

Comments
 (0)