@@ -788,10 +788,6 @@ Result<::avro::NodePtr> CreateRecordNodeWithFieldIds(const ::avro::NodePtr& orig
788
788
original_node->names ());
789
789
}
790
790
const std::string& field_name = original_node->nameAt (i);
791
- if (i >= original_node->leaves ()) {
792
- return InvalidSchema (" Index {} is out of bounds for leaves (size: {})" , i,
793
- original_node->leaves ());
794
- }
795
791
::avro::NodePtr field_node = original_node->leafAt (i);
796
792
797
793
// TODO(liuxiaoyu): Add support for case sensitivity in name matching.
@@ -821,6 +817,7 @@ Result<::avro::NodePtr> CreateRecordNodeWithFieldIds(const ::avro::NodePtr& orig
821
817
const auto & original_attrs = original_node->customAttributesAt (i);
822
818
const auto & existing_attrs = original_attrs.attributes ();
823
819
for (const auto & attr_pair : existing_attrs) {
820
+ // Copy each existing attribute to preserve original metadata
824
821
attributes.addAttribute (attr_pair.first , attr_pair.second , false );
825
822
}
826
823
}
@@ -833,7 +830,7 @@ Result<::avro::NodePtr> CreateRecordNodeWithFieldIds(const ::avro::NodePtr& orig
833
830
834
831
// Recursively apply field IDs to nested fields
835
832
ICEBERG_ASSIGN_OR_RAISE (auto new_nested_node,
836
- CreateAvroNodeWithFieldIds (field_node, *nested_field));
833
+ MakeAvroNodeWithFieldIds (field_node, *nested_field));
837
834
new_record_node->addName (field_name);
838
835
new_record_node->addLeaf (new_nested_node);
839
836
} else {
@@ -856,7 +853,7 @@ Result<::avro::NodePtr> CreateArrayNodeWithFieldIds(const ::avro::NodePtr& origi
856
853
// Check if this is a map represented as array
857
854
if (HasMapLogicalType (original_node)) {
858
855
ICEBERG_ASSIGN_OR_RAISE (auto new_element_node,
859
- CreateAvroNodeWithFieldIds (original_node->leafAt (0 ), field));
856
+ MakeAvroNodeWithFieldIds (original_node->leafAt (0 ), field));
860
857
new_array_node->addLeaf (new_element_node);
861
858
return new_array_node;
862
859
}
@@ -881,8 +878,14 @@ Result<::avro::NodePtr> CreateArrayNodeWithFieldIds(const ::avro::NodePtr& origi
881
878
882
879
ICEBERG_ASSIGN_OR_RAISE (
883
880
auto new_element_node,
884
- CreateAvroNodeWithFieldIds (original_node->leafAt (0 ), *element_field));
881
+ MakeAvroNodeWithFieldIds (original_node->leafAt (0 ), *element_field));
885
882
new_array_node->addLeaf (new_element_node);
883
+
884
+ // Add element field ID as custom attribute
885
+ ::avro::CustomAttributes element_attributes;
886
+ element_attributes.addAttribute (std::string (kFieldIdProp ),
887
+ std::to_string (*element_field->field_id ), false );
888
+ new_array_node->addCustomAttributesForField (element_attributes);
886
889
} else {
887
890
// If no element field found, this is an error
888
891
return InvalidSchema (" Element field not found in nested mapping for array" );
@@ -899,32 +902,77 @@ Result<::avro::NodePtr> CreateMapNodeWithFieldIds(const ::avro::NodePtr& origina
899
902
900
903
auto new_map_node = std::make_shared<::avro::NodeMap>();
901
904
902
- // For map types, we use fixed field IDs for key and value
903
- // Key field gets field ID 0, value field gets field ID 1
904
- constexpr int32_t kMapKeyFieldId = 0 ;
905
- constexpr int32_t kMapValueFieldId = 1 ;
905
+ // For map types, we need to extract key and value field mappings from the nested
906
+ // mapping
907
+ if (!field.nested_mapping ) {
908
+ return InvalidSchema (" Map type requires nested mapping for key and value fields" );
909
+ }
910
+
911
+ // Find key and value field mappings by name
912
+ std::optional<int32_t > key_id = field.nested_mapping ->Id (" key" );
913
+ std::optional<int32_t > value_id = field.nested_mapping ->Id (" value" );
914
+
915
+ if (!key_id || !value_id) {
916
+ return InvalidSchema (" Map type requires both 'key' and 'value' field mappings" );
917
+ }
918
+
919
+ std::optional<MappedFieldConstRef> key_field_ref = field.nested_mapping ->Field (*key_id);
920
+ std::optional<MappedFieldConstRef> value_field_ref =
921
+ field.nested_mapping ->Field (*value_id);
922
+
923
+ if (!key_field_ref || !value_field_ref) {
924
+ return InvalidSchema (" Map type requires both key and value field mappings" );
925
+ }
926
+
927
+ const auto & key_mapped_field = key_field_ref->get ();
928
+ const auto & value_mapped_field = value_field_ref->get ();
906
929
907
- // Create key field with fixed field ID
930
+ if (!key_mapped_field.field_id || !value_mapped_field.field_id ) {
931
+ return InvalidSchema (" Map key and value fields must have field IDs" );
932
+ }
933
+
934
+ // Create key field with mapped field ID
908
935
MappedField key_field;
909
- key_field.field_id = kMapKeyFieldId ;
910
- key_field.nested_mapping =
911
- field.nested_mapping ; // Pass through nested mapping for complex key types
936
+ key_field.field_id = *key_mapped_field.field_id ;
937
+ key_field.nested_mapping = key_mapped_field.nested_mapping ;
912
938
913
- // Create value field with fixed field ID
939
+ // Create value field with mapped field ID
914
940
MappedField value_field;
915
- value_field.field_id = kMapValueFieldId ;
916
- value_field.nested_mapping =
917
- field.nested_mapping ; // Pass through nested mapping for complex value types
941
+ value_field.field_id = *value_mapped_field.field_id ;
942
+ value_field.nested_mapping = value_mapped_field.nested_mapping ;
918
943
919
944
// Add key and value nodes
920
- ICEBERG_ASSIGN_OR_RAISE (
921
- auto new_key_node, CreateAvroNodeWithFieldIds (original_node->leafAt (0 ), key_field));
945
+ ICEBERG_ASSIGN_OR_RAISE (auto new_key_node,
946
+ MakeAvroNodeWithFieldIds (original_node->leafAt (0 ), key_field));
922
947
ICEBERG_ASSIGN_OR_RAISE (
923
948
auto new_value_node,
924
- CreateAvroNodeWithFieldIds (original_node->leafAt (1 ), value_field));
949
+ MakeAvroNodeWithFieldIds (original_node->leafAt (1 ), value_field));
925
950
new_map_node->addLeaf (new_key_node);
926
951
new_map_node->addLeaf (new_value_node);
927
952
953
+ // Preserve existing custom attributes from the original node and add field ID
954
+ // attributes Copy existing attributes from the original node (if any)
955
+ if (original_node->customAttributes () > 0 ) {
956
+ const auto & original_attrs = original_node->customAttributesAt (0 );
957
+ const auto & existing_attrs = original_attrs.attributes ();
958
+ for (const auto & attr_pair : existing_attrs) {
959
+ // Copy each existing attribute to preserve original metadata
960
+ ::avro::CustomAttributes attributes;
961
+ attributes.addAttribute (attr_pair.first , attr_pair.second , false );
962
+ new_map_node->addCustomAttributesForField (attributes);
963
+ }
964
+ }
965
+
966
+ ::avro::CustomAttributes key_attributes;
967
+ key_attributes.addAttribute (std::string (kFieldIdProp ),
968
+ std::to_string (*key_mapped_field.field_id ), false );
969
+ new_map_node->addCustomAttributesForField (key_attributes);
970
+
971
+ ::avro::CustomAttributes value_attributes;
972
+ value_attributes.addAttribute (std::string (kFieldIdProp ),
973
+ std::to_string (*value_mapped_field.field_id ), false );
974
+ new_map_node->addCustomAttributesForField (value_attributes);
975
+
928
976
return new_map_node;
929
977
}
930
978
@@ -942,16 +990,14 @@ Result<::avro::NodePtr> CreateUnionNodeWithFieldIds(const ::avro::NodePtr& origi
942
990
943
991
if (branch_0_is_null && !branch_1_is_null) {
944
992
// branch_0 is null, branch_1 is not null
945
- ICEBERG_ASSIGN_OR_RAISE (auto new_branch_1,
946
- CreateAvroNodeWithFieldIds (branch_1, field));
993
+ ICEBERG_ASSIGN_OR_RAISE (auto new_branch_1, MakeAvroNodeWithFieldIds (branch_1, field));
947
994
auto new_union_node = std::make_shared<::avro::NodeUnion>();
948
995
new_union_node->addLeaf (branch_0); // null branch
949
996
new_union_node->addLeaf (new_branch_1);
950
997
return new_union_node;
951
998
} else if (!branch_0_is_null && branch_1_is_null) {
952
999
// branch_0 is not null, branch_1 is null
953
- ICEBERG_ASSIGN_OR_RAISE (auto new_branch_0,
954
- CreateAvroNodeWithFieldIds (branch_0, field));
1000
+ ICEBERG_ASSIGN_OR_RAISE (auto new_branch_0, MakeAvroNodeWithFieldIds (branch_0, field));
955
1001
auto new_union_node = std::make_shared<::avro::NodeUnion>();
956
1002
new_union_node->addLeaf (new_branch_0);
957
1003
new_union_node->addLeaf (branch_1); // null branch
@@ -967,8 +1013,8 @@ Result<::avro::NodePtr> CreateUnionNodeWithFieldIds(const ::avro::NodePtr& origi
967
1013
968
1014
} // namespace
969
1015
970
- Result<::avro::NodePtr> CreateAvroNodeWithFieldIds (const ::avro::NodePtr& original_node,
971
- const MappedField& mapped_field) {
1016
+ Result<::avro::NodePtr> MakeAvroNodeWithFieldIds (const ::avro::NodePtr& original_node,
1017
+ const MappedField& mapped_field) {
972
1018
switch (original_node->type ()) {
973
1019
case ::avro::AVRO_RECORD:
974
1020
return CreateRecordNodeWithFieldIds (original_node, mapped_field);
@@ -996,11 +1042,11 @@ Result<::avro::NodePtr> CreateAvroNodeWithFieldIds(const ::avro::NodePtr& origin
996
1042
}
997
1043
}
998
1044
999
- Result<::avro::NodePtr> CreateAvroNodeWithFieldIds (const ::avro::NodePtr& original_node,
1000
- const NameMapping& mapping) {
1045
+ Result<::avro::NodePtr> MakeAvroNodeWithFieldIds (const ::avro::NodePtr& original_node,
1046
+ const NameMapping& mapping) {
1001
1047
MappedField mapped_field;
1002
1048
mapped_field.nested_mapping = std::make_shared<MappedFields>(mapping.AsMappedFields ());
1003
- return CreateAvroNodeWithFieldIds (original_node, mapped_field);
1049
+ return MakeAvroNodeWithFieldIds (original_node, mapped_field);
1004
1050
}
1005
1051
1006
1052
} // namespace iceberg::avro
0 commit comments