Skip to content

Commit a7ff3db

Browse files
Add support for no metadata
1 parent b228adb commit a7ff3db

File tree

1 file changed

+66
-42
lines changed

1 file changed

+66
-42
lines changed

native/perform_etl.cc

Lines changed: 66 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -969,7 +969,12 @@ void join_and_write_single(
969969
}
970970
}
971971

972-
auto metadata_type = std::make_shared<arrow::StructType>(metadata_fields);
972+
std::shared_ptr<arrow::DataType> metadata_type;
973+
if (metadata_columns.size() != 0) {
974+
metadata_type = std::make_shared<arrow::StructType>(metadata_fields);
975+
} else {
976+
metadata_type = std::make_shared<arrow::FloatType>();
977+
}
973978

974979
auto timestamp_type =
975980
std::make_shared<arrow::TimestampType>(arrow::TimeUnit::MICRO);
@@ -984,6 +989,7 @@ void join_and_write_single(
984989

985990
arrow::field("metadata", metadata_type),
986991
};
992+
987993
auto measurement_type =
988994
std::make_shared<arrow::StructType>(measurement_type_fields);
989995

@@ -1074,29 +1080,41 @@ void join_and_write_single(
10741080
metadata_columns.size());
10751081
std::vector<std::shared_ptr<arrow::FixedSizeBinaryBuilder>>
10761082
primitive_metadata_builders(metadata_columns.size());
1077-
std::vector<std::shared_ptr<arrow::ArrayBuilder>> metadata_builders(
1078-
metadata_columns.size());
1079-
for (size_t i = 0; i < metadata_columns.size(); i++) {
1080-
if (is_text_metadata[i]) {
1081-
auto builder = std::make_shared<arrow::StringBuilder>(pool);
1082-
text_metadata_builders[i] = builder;
1083-
metadata_builders[i] = builder;
1084-
} else {
1085-
auto builder = std::make_shared<arrow::FixedSizeBinaryBuilder>(
1086-
std::make_shared<arrow::FixedSizeBinaryType>(
1087-
metadata_columns[i].second->byte_width()));
1088-
primitive_metadata_builders[i] = builder;
1089-
metadata_builders[i] = builder;
1083+
1084+
std::shared_ptr<arrow::StructBuilder> metadata_builder;
1085+
std::shared_ptr<arrow::FloatBuilder> null_metadata_builder;
1086+
std::shared_ptr<arrow::ArrayBuilder> metadata_builder_holder;
1087+
1088+
if (metadata_columns.size() != 0) {
1089+
std::vector<std::shared_ptr<arrow::ArrayBuilder>> metadata_builders(
1090+
metadata_columns.size());
1091+
for (size_t i = 0; i < metadata_columns.size(); i++) {
1092+
if (is_text_metadata[i]) {
1093+
auto builder = std::make_shared<arrow::StringBuilder>(pool);
1094+
text_metadata_builders[i] = builder;
1095+
metadata_builders[i] = builder;
1096+
} else {
1097+
auto builder = std::make_shared<arrow::FixedSizeBinaryBuilder>(
1098+
std::make_shared<arrow::FixedSizeBinaryType>(
1099+
metadata_columns[i].second->byte_width()));
1100+
primitive_metadata_builders[i] = builder;
1101+
metadata_builders[i] = builder;
1102+
}
10901103
}
1091-
}
10921104

1093-
auto metadata_builder = std::make_shared<arrow::StructBuilder>(
1094-
metadata_type, pool, metadata_builders);
1105+
metadata_builder = std::make_shared<arrow::StructBuilder>(
1106+
metadata_type, pool, metadata_builders);
1107+
metadata_builder_holder = metadata_builder;
1108+
} else {
1109+
null_metadata_builder = std::make_shared<arrow::FloatBuilder>(pool);
1110+
metadata_builder_holder = null_metadata_builder;
1111+
}
10951112

10961113
std::vector<std::shared_ptr<arrow::ArrayBuilder>>
1097-
measurement_builder_fields{code_builder, text_value_builder,
1098-
numeric_value_builder,
1099-
datetime_value_builder, metadata_builder};
1114+
measurement_builder_fields{
1115+
code_builder, text_value_builder, numeric_value_builder,
1116+
datetime_value_builder, metadata_builder_holder};
1117+
11001118
auto measurement_builder = std::make_shared<arrow::StructBuilder>(
11011119
measurement_type, pool, measurement_builder_fields);
11021120

@@ -1125,6 +1143,7 @@ void join_and_write_single(
11251143

11261144
std::shared_ptr<arrow::Table> table =
11271145
arrow::Table::Make(schema, columns);
1146+
11281147
PARQUET_THROW_NOT_OK(writer->WriteTable(*table));
11291148

11301149
amount_written = 0;
@@ -1211,29 +1230,34 @@ void join_and_write_single(
12111230
PARQUET_THROW_NOT_OK(text_value_builder->AppendNull());
12121231
}
12131232

1214-
PARQUET_THROW_NOT_OK(metadata_builder->Append());
1215-
for (size_t j = 0; j < metadata_columns.size(); j++) {
1216-
if (non_null[3 + j]) {
1217-
size_t size = *reinterpret_cast<const size_t*>(
1218-
patient_record.substr(offset).data());
1219-
offset += sizeof(size);
1220-
auto entry = patient_record.substr(offset, size);
1221-
1222-
if (is_text_metadata[j]) {
1223-
PARQUET_THROW_NOT_OK(
1224-
text_metadata_builders[j]->Append(entry));
1225-
} else {
1226-
PARQUET_THROW_NOT_OK(
1227-
primitive_metadata_builders[j]->Append(entry));
1228-
}
1229-
offset += size;
1230-
} else {
1231-
if (is_text_metadata[j]) {
1232-
PARQUET_THROW_NOT_OK(
1233-
text_metadata_builders[j]->AppendNull());
1233+
if (metadata_columns.size() == 0) {
1234+
PARQUET_THROW_NOT_OK(null_metadata_builder->AppendNull());
1235+
} else {
1236+
PARQUET_THROW_NOT_OK(metadata_builder->Append());
1237+
1238+
for (size_t j = 0; j < metadata_columns.size(); j++) {
1239+
if (non_null[3 + j]) {
1240+
size_t size = *reinterpret_cast<const size_t*>(
1241+
patient_record.substr(offset).data());
1242+
offset += sizeof(size);
1243+
auto entry = patient_record.substr(offset, size);
1244+
1245+
if (is_text_metadata[j]) {
1246+
PARQUET_THROW_NOT_OK(
1247+
text_metadata_builders[j]->Append(entry));
1248+
} else {
1249+
PARQUET_THROW_NOT_OK(
1250+
primitive_metadata_builders[j]->Append(entry));
1251+
}
1252+
offset += size;
12341253
} else {
1235-
PARQUET_THROW_NOT_OK(
1236-
primitive_metadata_builders[j]->AppendNull());
1254+
if (is_text_metadata[j]) {
1255+
PARQUET_THROW_NOT_OK(
1256+
text_metadata_builders[j]->AppendNull());
1257+
} else {
1258+
PARQUET_THROW_NOT_OK(
1259+
primitive_metadata_builders[j]->AppendNull());
1260+
}
12371261
}
12381262
}
12391263
}

0 commit comments

Comments
 (0)