@@ -342,7 +342,8 @@ class IAggregateFunction : public std::enable_shared_from_this<IAggregateFunctio
342
342
std::function<void (AggregateDataPtr &)> init,
343
343
const UInt8 * key,
344
344
const IColumn ** columns,
345
- Arena * arena) const = 0;
345
+ Arena * arena,
346
+ const IColumn * delta_col = nullptr) const = 0;
346
347
347
348
/* * Insert result of aggregate function into result column with batch size.
348
349
* The implementation of this method will destroy aggregate place up to -State if insert state into result column was successful.
@@ -787,35 +788,81 @@ class IAggregateFunctionHelper : public IAggregateFunction
787
788
std::function<void (AggregateDataPtr &)> init,
788
789
const UInt8 * key,
789
790
const IColumn ** columns,
790
- Arena * arena) const override
791
+ Arena * arena,
792
+ const IColumn * delta_col = nullptr) const override
791
793
{
792
794
static constexpr size_t UNROLL_COUNT = 8 ;
793
795
794
- size_t i = row_begin;
795
-
796
- size_t size_unrolled = (row_end - row_begin) / UNROLL_COUNT * UNROLL_COUNT;
797
- for (; i < size_unrolled; i += UNROLL_COUNT)
796
+ if (delta_col == nullptr )
798
797
{
799
- AggregateDataPtr places[UNROLL_COUNT];
800
- for (size_t j = 0 ; j < UNROLL_COUNT; ++j)
798
+ // / Fast path. non-changelog
799
+ size_t i = row_begin;
800
+
801
+ size_t size_unrolled = (row_end - row_begin) / UNROLL_COUNT * UNROLL_COUNT;
802
+ for (; i < size_unrolled; i += UNROLL_COUNT)
801
803
{
802
- AggregateDataPtr & place = map[key[i + j]];
803
- if (unlikely (!place))
804
- init (place);
804
+ AggregateDataPtr places[UNROLL_COUNT];
805
+ for (size_t j = 0 ; j < UNROLL_COUNT; ++j)
806
+ {
807
+ AggregateDataPtr & place = map[key[i + j]];
808
+ if (unlikely (!place))
809
+ init (place);
810
+
811
+ places[j] = place;
812
+ }
805
813
806
- places[j] = place;
814
+ for (size_t j = 0 ; j < UNROLL_COUNT; ++j)
815
+ static_cast <const Derived *>(this )->add (places[j] + place_offset, columns, i + j, arena);
807
816
}
808
817
809
- for (size_t j = 0 ; j < UNROLL_COUNT; ++j)
810
- static_cast <const Derived *>(this )->add (places[j] + place_offset, columns, i + j, arena);
818
+ for (; i < row_end; ++i)
819
+ {
820
+ AggregateDataPtr & place = map[key[i]];
821
+ if (unlikely (!place))
822
+ init (place);
823
+ static_cast <const Derived *>(this )->add (place + place_offset, columns, i, arena);
824
+ }
811
825
}
812
-
813
- for (; i < row_end; ++i)
826
+ else
814
827
{
815
- AggregateDataPtr & place = map[key[i]];
816
- if (unlikely (!place))
817
- init (place);
818
- static_cast <const Derived *>(this )->add (place + place_offset, columns, i, arena);
828
+ // / changelog
829
+ const auto & delta_flags = assert_cast<const ColumnInt8 &>(*delta_col).getData ();
830
+
831
+ size_t i = row_begin;
832
+
833
+ size_t size_unrolled = (row_end - row_begin) / UNROLL_COUNT * UNROLL_COUNT;
834
+ for (; i < size_unrolled; i += UNROLL_COUNT)
835
+ {
836
+ AggregateDataPtr places[UNROLL_COUNT];
837
+ for (size_t j = 0 ; j < UNROLL_COUNT; ++j)
838
+ {
839
+ AggregateDataPtr & place = map[key[i + j]];
840
+ if (unlikely (!place))
841
+ init (place);
842
+
843
+ places[j] = place;
844
+ }
845
+
846
+ for (size_t j = 0 ; j < UNROLL_COUNT; ++j)
847
+ {
848
+ if (delta_flags[i] >= 0 )
849
+ static_cast <const Derived *>(this )->add (places[j] + place_offset, columns, i + j, arena);
850
+ else
851
+ static_cast <const Derived *>(this )->negate (places[j] + place_offset, columns, i + j, arena);
852
+ }
853
+ }
854
+
855
+ for (; i < row_end; ++i)
856
+ {
857
+ AggregateDataPtr & place = map[key[i]];
858
+ if (unlikely (!place))
859
+ init (place);
860
+
861
+ if (delta_flags[i] >= 0 )
862
+ static_cast <const Derived *>(this )->add (place + place_offset, columns, i, arena);
863
+ else
864
+ static_cast <const Derived *>(this )->negate (place + place_offset, columns, i, arena);
865
+ }
819
866
}
820
867
}
821
868
@@ -940,15 +987,16 @@ class IAggregateFunctionDataHelper : public IAggregateFunctionHelper<Derived>
940
987
std::function<void (AggregateDataPtr &)> init,
941
988
const UInt8 * key,
942
989
const IColumn ** columns,
943
- Arena * arena) const override
990
+ Arena * arena,
991
+ const IColumn * delta_col = nullptr) const override
944
992
{
945
993
const Derived & func = *static_cast <const Derived *>(this );
946
994
947
995
// / If the function is complex or too large, use more generic algorithm.
948
996
949
997
if (func.allocatesMemoryInArena () || sizeof (Data) > 16 || func.sizeOfData () != sizeof (Data))
950
998
{
951
- IAggregateFunctionHelper<Derived>::addBatchLookupTable8 (row_begin, row_end, map, place_offset, init, key, columns, arena);
999
+ IAggregateFunctionHelper<Derived>::addBatchLookupTable8 (row_begin, row_end, map, place_offset, init, key, columns, arena, delta_col );
952
1000
return ;
953
1001
}
954
1002
@@ -962,50 +1010,109 @@ class IAggregateFunctionDataHelper : public IAggregateFunctionHelper<Derived>
962
1010
size_t i = row_begin;
963
1011
964
1012
// / Aggregate data into different lookup tables.
965
-
966
- size_t size_unrolled = (row_end - row_begin) / UNROLL_COUNT * UNROLL_COUNT;
967
- for (; i < size_unrolled; i += UNROLL_COUNT)
1013
+ if (delta_col == nullptr )
968
1014
{
969
- for (size_t j = 0 ; j < UNROLL_COUNT; ++j)
1015
+ // / Fast path. non-changelog
1016
+ size_t size_unrolled = (row_end - row_begin) / UNROLL_COUNT * UNROLL_COUNT;
1017
+ for (; i < size_unrolled; i += UNROLL_COUNT)
970
1018
{
971
- size_t idx = j * 256 + key[i + j];
972
- if (unlikely (!has_data[idx]))
1019
+ for (size_t j = 0 ; j < UNROLL_COUNT; ++j)
973
1020
{
974
- new (&places[idx]) Data;
975
- has_data[idx] = true ;
1021
+ size_t idx = j * 256 + key[i + j];
1022
+ if (unlikely (!has_data[idx]))
1023
+ {
1024
+ new (&places[idx]) Data;
1025
+ has_data[idx] = true ;
1026
+ }
1027
+ func.add (reinterpret_cast <char *>(&places[idx]), columns, i + j, nullptr );
976
1028
}
977
- func.add (reinterpret_cast <char *>(&places[idx]), columns, i + j, nullptr );
978
1029
}
979
- }
980
1030
981
- // / Merge data from every lookup table to the final destination.
1031
+ // / Merge data from every lookup table to the final destination.
982
1032
983
- for (size_t k = 0 ; k < 256 ; ++k)
1033
+ for (size_t k = 0 ; k < 256 ; ++k)
1034
+ {
1035
+ for (size_t j = 0 ; j < UNROLL_COUNT; ++j)
1036
+ {
1037
+ size_t idx = j * 256 + k;
1038
+ if (has_data[idx])
1039
+ {
1040
+ AggregateDataPtr & place = map[k];
1041
+ if (unlikely (!place))
1042
+ init (place);
1043
+
1044
+ func.merge (place + place_offset, reinterpret_cast <const char *>(&places[idx]), nullptr );
1045
+ }
1046
+ }
1047
+ }
1048
+
1049
+ // / Process tails and add directly to the final destination.
1050
+
1051
+ for (; i < row_end; ++i)
1052
+ {
1053
+ size_t k = key[i];
1054
+ AggregateDataPtr & place = map[k];
1055
+ if (unlikely (!place))
1056
+ init (place);
1057
+
1058
+ func.add (place + place_offset, columns, i, nullptr );
1059
+ }
1060
+ }
1061
+ else
984
1062
{
985
- for (size_t j = 0 ; j < UNROLL_COUNT; ++j)
1063
+ // / changelog
1064
+ const auto & delta_flags = assert_cast<const ColumnInt8 &>(*delta_col).getData ();
1065
+ size_t size_unrolled = (row_end - row_begin) / UNROLL_COUNT * UNROLL_COUNT;
1066
+ for (; i < size_unrolled; i += UNROLL_COUNT)
986
1067
{
987
- size_t idx = j * 256 + k;
988
- if (has_data[idx])
1068
+ for (size_t j = 0 ; j < UNROLL_COUNT; ++j)
989
1069
{
990
- AggregateDataPtr & place = map[k];
991
- if (unlikely (!place))
992
- init (place);
1070
+ size_t idx = j * 256 + key[i + j];
1071
+ if (unlikely (!has_data[idx]))
1072
+ {
1073
+ new (&places[idx]) Data;
1074
+ has_data[idx] = true ;
1075
+ }
993
1076
994
- func.merge (place + place_offset, reinterpret_cast <const char *>(&places[idx]), nullptr );
1077
+ if (delta_flags[i] >= 0 )
1078
+ func.add (reinterpret_cast <char *>(&places[idx]), columns, i + j, nullptr );
1079
+ else
1080
+ func.negate (reinterpret_cast <char *>(&places[idx]), columns, i + j, nullptr );
995
1081
}
996
1082
}
997
- }
998
1083
999
- // / Process tails and add directly to the final destination.
1084
+ // / Merge data from every lookup table to the final destination.
1000
1085
1001
- for (; i < row_end; ++i)
1002
- {
1003
- size_t k = key[i];
1004
- AggregateDataPtr & place = map[k];
1005
- if (unlikely (!place))
1006
- init (place);
1086
+ for (size_t k = 0 ; k < 256 ; ++k)
1087
+ {
1088
+ for (size_t j = 0 ; j < UNROLL_COUNT; ++j)
1089
+ {
1090
+ size_t idx = j * 256 + k;
1091
+ if (has_data[idx])
1092
+ {
1093
+ AggregateDataPtr & place = map[k];
1094
+ if (unlikely (!place))
1095
+ init (place);
1096
+
1097
+ func.merge (place + place_offset, reinterpret_cast <const char *>(&places[idx]), nullptr );
1098
+ }
1099
+ }
1100
+ }
1101
+
1102
+ // / Process tails and add directly to the final destination.
1103
+
1104
+ for (; i < row_end; ++i)
1105
+ {
1106
+ size_t k = key[i];
1107
+ AggregateDataPtr & place = map[k];
1108
+ if (unlikely (!place))
1109
+ init (place);
1007
1110
1008
- func.add (place + place_offset, columns, i, nullptr );
1111
+ if (delta_flags[i] >= 0 )
1112
+ func.add (place + place_offset, columns, i, nullptr );
1113
+ else
1114
+ func.negate (place + place_offset, columns, i, nullptr );
1115
+ }
1009
1116
}
1010
1117
}
1011
1118
};
0 commit comments