Skip to content

Commit a19781e

Browse files
fix name mapping and schema
1 parent fdec322 commit a19781e

File tree

4 files changed

+419
-477
lines changed

4 files changed

+419
-477
lines changed

src/iceberg/avro/avro_reader.cc

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -117,16 +117,6 @@ class AvroReader::Impl {
117117
// Create a new schema with the updated root node
118118
auto new_schema = ::avro::ValidSchema(new_root_node);
119119

120-
// Verify that all fields now have IDs after applying the name mapping
121-
HasIdVisitor verify_visitor;
122-
ICEBERG_RETURN_UNEXPECTED(verify_visitor.Visit(new_schema));
123-
if (!verify_visitor.AllHaveIds()) {
124-
// TODO(liuxiaoyu): Print detailed error message with missing field IDs
125-
// information in future
126-
return InvalidSchema(
127-
"Not all fields have field IDs after applying name mapping.");
128-
}
129-
130120
// Update the file schema to use the new schema with field IDs
131121
file_schema = new_schema;
132122
} else {

src/iceberg/avro/avro_schema_util.cc

Lines changed: 21 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -794,11 +794,13 @@ Result<::avro::NodePtr> CreateRecordNodeWithFieldIds(const ::avro::NodePtr& orig
794794

795795
for (size_t i = 0; i < original_node->leaves(); ++i) {
796796
if (i >= original_node->names()) {
797-
return InvalidSchema(...);
797+
return InvalidSchema("Index {} is out of bounds for names (size: {})", i,
798+
original_node->names());
798799
}
799800
const std::string& field_name = original_node->nameAt(i);
800801
if (i >= original_node->leaves()) {
801-
return InvalidSchema(...);
802+
return InvalidSchema("Index {} is out of bounds for leaves (size: {})", i,
803+
original_node->leaves());
802804
}
803805
::avro::NodePtr field_node = original_node->leafAt(i);
804806

@@ -907,42 +909,29 @@ Result<::avro::NodePtr> CreateMapNodeWithFieldIds(const ::avro::NodePtr& origina
907909

908910
auto new_map_node = std::make_shared<::avro::NodeMap>();
909911

910-
// Try to find key and value fields from nested mapping
911-
const MappedField* key_field = nullptr;
912-
const MappedField* value_field = nullptr;
913-
if (field.nested_mapping) {
914-
auto fields_span = field.nested_mapping->fields();
915-
for (const auto& f : fields_span) {
916-
if (f.names.find(std::string(kKey)) != f.names.end()) {
917-
key_field = &f;
918-
} else if (f.names.find(std::string(kValue)) != f.names.end()) {
919-
value_field = &f;
920-
}
921-
}
922-
}
912+
// For map types, we use fixed field IDs for key and value
913+
// Key field gets field ID 0, value field gets field ID 1
914+
constexpr int32_t kMapKeyFieldId = 0;
915+
constexpr int32_t kMapValueFieldId = 1;
923916

924-
// Check if both key and value fields are found
925-
if (!key_field) {
926-
return InvalidSchema("Key field not found in nested mapping for map");
927-
}
928-
if (!value_field) {
929-
return InvalidSchema("Value field not found in nested mapping for map");
930-
}
917+
// Create key field with fixed field ID
918+
MappedField key_field;
919+
key_field.field_id = kMapKeyFieldId;
920+
key_field.nested_mapping =
921+
field.nested_mapping; // Pass through nested mapping for complex key types
931922

932-
// Check if field_ids are present
933-
if (!key_field->field_id.has_value()) {
934-
return InvalidSchema("Field ID is missing for key field in map");
935-
}
936-
if (!value_field->field_id.has_value()) {
937-
return InvalidSchema("Field ID is missing for value field in map");
938-
}
923+
// Create value field with fixed field ID
924+
MappedField value_field;
925+
value_field.field_id = kMapValueFieldId;
926+
value_field.nested_mapping =
927+
field.nested_mapping; // Pass through nested mapping for complex value types
939928

940929
// Add key and value nodes
941-
ICEBERG_ASSIGN_OR_RAISE(auto new_key_node, CreateAvroNodeWithFieldIds(
942-
original_node->leafAt(0), *key_field));
930+
ICEBERG_ASSIGN_OR_RAISE(
931+
auto new_key_node, CreateAvroNodeWithFieldIds(original_node->leafAt(0), key_field));
943932
ICEBERG_ASSIGN_OR_RAISE(
944933
auto new_value_node,
945-
CreateAvroNodeWithFieldIds(original_node->leafAt(1), *value_field));
934+
CreateAvroNodeWithFieldIds(original_node->leafAt(1), value_field));
946935
new_map_node->addLeaf(new_key_node);
947936
new_map_node->addLeaf(new_value_node);
948937

0 commit comments

Comments
 (0)