@@ -798,10 +798,6 @@ Result<::avro::NodePtr> CreateRecordNodeWithFieldIds(const ::avro::NodePtr& orig
798
798
original_node->names ());
799
799
}
800
800
const std::string& field_name = original_node->nameAt (i);
801
- if (i >= original_node->leaves ()) {
802
- return InvalidSchema (" Index {} is out of bounds for leaves (size: {})" , i,
803
- original_node->leaves ());
804
- }
805
801
::avro::NodePtr field_node = original_node->leafAt (i);
806
802
807
803
// TODO(liuxiaoyu): Add support for case sensitivity in name matching.
@@ -831,6 +827,7 @@ Result<::avro::NodePtr> CreateRecordNodeWithFieldIds(const ::avro::NodePtr& orig
831
827
const auto & original_attrs = original_node->customAttributesAt (i);
832
828
const auto & existing_attrs = original_attrs.attributes ();
833
829
for (const auto & attr_pair : existing_attrs) {
830
+ // Copy each existing attribute to preserve original metadata
834
831
attributes.addAttribute (attr_pair.first , attr_pair.second , false );
835
832
}
836
833
}
@@ -843,7 +840,7 @@ Result<::avro::NodePtr> CreateRecordNodeWithFieldIds(const ::avro::NodePtr& orig
843
840
844
841
// Recursively apply field IDs to nested fields
845
842
ICEBERG_ASSIGN_OR_RAISE (auto new_nested_node,
846
- CreateAvroNodeWithFieldIds (field_node, *nested_field));
843
+ MakeAvroNodeWithFieldIds (field_node, *nested_field));
847
844
new_record_node->addName (field_name);
848
845
new_record_node->addLeaf (new_nested_node);
849
846
} else {
@@ -866,7 +863,7 @@ Result<::avro::NodePtr> CreateArrayNodeWithFieldIds(const ::avro::NodePtr& origi
866
863
// Check if this is a map represented as array
867
864
if (HasMapLogicalType (original_node)) {
868
865
ICEBERG_ASSIGN_OR_RAISE (auto new_element_node,
869
- CreateAvroNodeWithFieldIds (original_node->leafAt (0 ), field));
866
+ MakeAvroNodeWithFieldIds (original_node->leafAt (0 ), field));
870
867
new_array_node->addLeaf (new_element_node);
871
868
return new_array_node;
872
869
}
@@ -891,8 +888,14 @@ Result<::avro::NodePtr> CreateArrayNodeWithFieldIds(const ::avro::NodePtr& origi
891
888
892
889
ICEBERG_ASSIGN_OR_RAISE (
893
890
auto new_element_node,
894
- CreateAvroNodeWithFieldIds (original_node->leafAt (0 ), *element_field));
891
+ MakeAvroNodeWithFieldIds (original_node->leafAt (0 ), *element_field));
895
892
new_array_node->addLeaf (new_element_node);
893
+
894
+ // Add element field ID as custom attribute
895
+ ::avro::CustomAttributes element_attributes;
896
+ element_attributes.addAttribute (std::string (kFieldIdProp ),
897
+ std::to_string (*element_field->field_id ), false );
898
+ new_array_node->addCustomAttributesForField (element_attributes);
896
899
} else {
897
900
// If no element field found, this is an error
898
901
return InvalidSchema (" Element field not found in nested mapping for array" );
@@ -909,32 +912,77 @@ Result<::avro::NodePtr> CreateMapNodeWithFieldIds(const ::avro::NodePtr& origina
909
912
910
913
auto new_map_node = std::make_shared<::avro::NodeMap>();
911
914
912
- // For map types, we use fixed field IDs for key and value
913
- // Key field gets field ID 0, value field gets field ID 1
914
- constexpr int32_t kMapKeyFieldId = 0 ;
915
- constexpr int32_t kMapValueFieldId = 1 ;
915
+ // For map types, we need to extract key and value field mappings from the nested
916
+ // mapping
917
+ if (!field.nested_mapping ) {
918
+ return InvalidSchema (" Map type requires nested mapping for key and value fields" );
919
+ }
920
+
921
+ // Find key and value field mappings by name
922
+ std::optional<int32_t > key_id = field.nested_mapping ->Id (" key" );
923
+ std::optional<int32_t > value_id = field.nested_mapping ->Id (" value" );
924
+
925
+ if (!key_id || !value_id) {
926
+ return InvalidSchema (" Map type requires both 'key' and 'value' field mappings" );
927
+ }
928
+
929
+ std::optional<MappedFieldConstRef> key_field_ref = field.nested_mapping ->Field (*key_id);
930
+ std::optional<MappedFieldConstRef> value_field_ref =
931
+ field.nested_mapping ->Field (*value_id);
932
+
933
+ if (!key_field_ref || !value_field_ref) {
934
+ return InvalidSchema (" Map type requires both key and value field mappings" );
935
+ }
936
+
937
+ const auto & key_mapped_field = key_field_ref->get ();
938
+ const auto & value_mapped_field = value_field_ref->get ();
916
939
917
- // Create key field with fixed field ID
940
+ if (!key_mapped_field.field_id || !value_mapped_field.field_id ) {
941
+ return InvalidSchema (" Map key and value fields must have field IDs" );
942
+ }
943
+
944
+ // Create key field with mapped field ID
918
945
MappedField key_field;
919
- key_field.field_id = kMapKeyFieldId ;
920
- key_field.nested_mapping =
921
- field.nested_mapping ; // Pass through nested mapping for complex key types
946
+ key_field.field_id = *key_mapped_field.field_id ;
947
+ key_field.nested_mapping = key_mapped_field.nested_mapping ;
922
948
923
- // Create value field with fixed field ID
949
+ // Create value field with mapped field ID
924
950
MappedField value_field;
925
- value_field.field_id = kMapValueFieldId ;
926
- value_field.nested_mapping =
927
- field.nested_mapping ; // Pass through nested mapping for complex value types
951
+ value_field.field_id = *value_mapped_field.field_id ;
952
+ value_field.nested_mapping = value_mapped_field.nested_mapping ;
928
953
929
954
// Add key and value nodes
930
- ICEBERG_ASSIGN_OR_RAISE (
931
- auto new_key_node, CreateAvroNodeWithFieldIds (original_node->leafAt (0 ), key_field));
955
+ ICEBERG_ASSIGN_OR_RAISE (auto new_key_node,
956
+ MakeAvroNodeWithFieldIds (original_node->leafAt (0 ), key_field));
932
957
ICEBERG_ASSIGN_OR_RAISE (
933
958
auto new_value_node,
934
- CreateAvroNodeWithFieldIds (original_node->leafAt (1 ), value_field));
959
+ MakeAvroNodeWithFieldIds (original_node->leafAt (1 ), value_field));
935
960
new_map_node->addLeaf (new_key_node);
936
961
new_map_node->addLeaf (new_value_node);
937
962
963
+ // Preserve existing custom attributes from the original node and add field ID
964
+ // attributes Copy existing attributes from the original node (if any)
965
+ if (original_node->customAttributes () > 0 ) {
966
+ const auto & original_attrs = original_node->customAttributesAt (0 );
967
+ const auto & existing_attrs = original_attrs.attributes ();
968
+ for (const auto & attr_pair : existing_attrs) {
969
+ // Copy each existing attribute to preserve original metadata
970
+ ::avro::CustomAttributes attributes;
971
+ attributes.addAttribute (attr_pair.first , attr_pair.second , false );
972
+ new_map_node->addCustomAttributesForField (attributes);
973
+ }
974
+ }
975
+
976
+ ::avro::CustomAttributes key_attributes;
977
+ key_attributes.addAttribute (std::string (kFieldIdProp ),
978
+ std::to_string (*key_mapped_field.field_id ), false );
979
+ new_map_node->addCustomAttributesForField (key_attributes);
980
+
981
+ ::avro::CustomAttributes value_attributes;
982
+ value_attributes.addAttribute (std::string (kFieldIdProp ),
983
+ std::to_string (*value_mapped_field.field_id ), false );
984
+ new_map_node->addCustomAttributesForField (value_attributes);
985
+
938
986
return new_map_node;
939
987
}
940
988
@@ -952,16 +1000,14 @@ Result<::avro::NodePtr> CreateUnionNodeWithFieldIds(const ::avro::NodePtr& origi
952
1000
953
1001
if (branch_0_is_null && !branch_1_is_null) {
954
1002
// branch_0 is null, branch_1 is not null
955
- ICEBERG_ASSIGN_OR_RAISE (auto new_branch_1,
956
- CreateAvroNodeWithFieldIds (branch_1, field));
1003
+ ICEBERG_ASSIGN_OR_RAISE (auto new_branch_1, MakeAvroNodeWithFieldIds (branch_1, field));
957
1004
auto new_union_node = std::make_shared<::avro::NodeUnion>();
958
1005
new_union_node->addLeaf (branch_0); // null branch
959
1006
new_union_node->addLeaf (new_branch_1);
960
1007
return new_union_node;
961
1008
} else if (!branch_0_is_null && branch_1_is_null) {
962
1009
// branch_0 is not null, branch_1 is null
963
- ICEBERG_ASSIGN_OR_RAISE (auto new_branch_0,
964
- CreateAvroNodeWithFieldIds (branch_0, field));
1010
+ ICEBERG_ASSIGN_OR_RAISE (auto new_branch_0, MakeAvroNodeWithFieldIds (branch_0, field));
965
1011
auto new_union_node = std::make_shared<::avro::NodeUnion>();
966
1012
new_union_node->addLeaf (new_branch_0);
967
1013
new_union_node->addLeaf (branch_1); // null branch
@@ -977,8 +1023,8 @@ Result<::avro::NodePtr> CreateUnionNodeWithFieldIds(const ::avro::NodePtr& origi
977
1023
978
1024
} // namespace
979
1025
980
- Result<::avro::NodePtr> CreateAvroNodeWithFieldIds (const ::avro::NodePtr& original_node,
981
- const MappedField& mapped_field) {
1026
+ Result<::avro::NodePtr> MakeAvroNodeWithFieldIds (const ::avro::NodePtr& original_node,
1027
+ const MappedField& mapped_field) {
982
1028
switch (original_node->type ()) {
983
1029
case ::avro::AVRO_RECORD:
984
1030
return CreateRecordNodeWithFieldIds (original_node, mapped_field);
@@ -1006,11 +1052,11 @@ Result<::avro::NodePtr> CreateAvroNodeWithFieldIds(const ::avro::NodePtr& origin
1006
1052
}
1007
1053
}
1008
1054
1009
- Result<::avro::NodePtr> CreateAvroNodeWithFieldIds (const ::avro::NodePtr& original_node,
1010
- const NameMapping& mapping) {
1055
+ Result<::avro::NodePtr> MakeAvroNodeWithFieldIds (const ::avro::NodePtr& original_node,
1056
+ const NameMapping& mapping) {
1011
1057
MappedField mapped_field;
1012
1058
mapped_field.nested_mapping = std::make_shared<MappedFields>(mapping.AsMappedFields ());
1013
- return CreateAvroNodeWithFieldIds (original_node, mapped_field);
1059
+ return MakeAvroNodeWithFieldIds (original_node, mapped_field);
1014
1060
}
1015
1061
1016
1062
} // namespace iceberg::avro
0 commit comments