@@ -82,27 +82,6 @@ struct IntDivider {
82
82
Value divisor;
83
83
};
84
84
85
- // static inline std::vector<int64_t> infer_size_dimvector(std::vector<int64_t>
86
- // a, std::vector<int64_t> b) {
87
- // // Use ptrdiff_t to ensure signed comparison.
88
- // auto dimsA = a.size();
89
- // auto dimsB = b.size();
90
- // auto ndim = dimsA > dimsB ? dimsA : dimsB;
91
- // std::vector<int64_t> expandedSizes = std::vector<int64_t> (ndim, 0);
92
-
93
- // for (int64_t i = ndim - 1; i >= 0; --i) {
94
- // int64_t offset = ndim - 1 - i;
95
- // int64_t dimA = dimsA - 1 - offset;
96
- // int64_t dimB = dimsB - 1 - offset;
97
- // auto sizeA = (dimA >= 0) ? a[dimA] : 1;
98
- // auto sizeB = (dimB >= 0) ? b[dimB] : 1;
99
-
100
- // expandedSizes[i] = sizeA == 1 ? sizeB : sizeA;
101
- // }
102
-
103
- // return expandedSizes;
104
- // }
105
-
106
85
template <int NARGS, typename index_t = uint32_t , bool signed_strides = false >
107
86
struct OffsetCalculator {
108
87
using stride_t =
@@ -153,358 +132,9 @@ struct OffsetCalculator {
153
132
stride_t strides_[MAX_DIMS][std::max<int >(NARGS, 1 )];
154
133
};
155
134
156
- // static inline std::vector<int64_t> compute_strides(
157
- // std::vector<int64_t> input_dims, // value_tensor
158
- // std::vector<int64_t> input_strides,
159
- // int64_t input_elesize,
160
- // int64_t ndim,
161
- // std::vector<int64_t> shape_,
162
- // std::vector<int64_t> &stride_size
163
- // ) {
164
- // std::vector<int64_t> stride_bytes(ndim, 0);
165
- // const auto& original_shape = input_dims;
166
- // const auto& original_stride = input_strides;
167
- // int64_t element_size_in_bytes = input_elesize;
168
- // int offset = ndim - original_shape.size();
169
-
170
- // if (offset > 0)
171
- // stride_bytes.resize(ndim, 0);
172
- // else
173
- // stride_bytes.resize(ndim);
174
- // for (int i=0; i<original_shape.size(); i++) {
175
- // if (original_shape[i] == 1 && shape_[offset + i] !=1) {
176
- // stride_bytes[offset + i] = 0;
177
- // } else {
178
- // stride_bytes[offset + i] = original_stride[i] *
179
- // element_size_in_bytes;
180
- // }
181
- // }
182
- // stride_size.push_back(stride_bytes.size());
183
- // return stride_bytes;
184
- // }
185
-
186
- // static inline std::vector<int64_t> compute_shapes(
187
- // std::vector<std::vector<int64_t>> input_dims
188
- // ) {
189
- // std::vector<int64_t> shape_;
190
- // for (size_t i=0; i<input_dims.size(); i++) {
191
- // auto shape = input_dims[i];
192
- // if (shape_.empty()) {
193
- // shape_ = shape;
194
- // } else if (!(shape == shape_)) {
195
- // shape_ = infer_size_dimvector(shape_, shape);
196
- // }
197
- // }
198
- // return shape_;
199
- // }
200
-
201
- // template <int N>
202
- // static inline void permute_dimensions(
203
- // std::array<int64_t*, N>& strides_array,
204
- // std::vector<int64_t>& stride_size,
205
- // std::vector<int64_t>& perm,
206
- // std::vector<int64_t>& shape_) {
207
-
208
- // auto reorder = [perm](std::vector<int64_t> data) {
209
- // auto res = std::vector<int64_t>(data.size(), 0);
210
- // for (int64_t i=0; i<perm.size(); i++) {
211
- // res[i] = data[perm[i]];
212
- // }
213
- // return res;
214
- // };
215
-
216
- // // Update shape and strides
217
- // shape_ = reorder(shape_);
218
-
219
- // static std::array<std::vector<int64_t>, N> temp_strides;
220
- // for (int64_t i = 0; i < N; i++) {
221
- // if (strides_array[i] != nullptr) {
222
- // std::vector<int64_t> original_data(strides_array[i], strides_array[i] +
223
- // stride_size[i]); temp_strides[i] = reorder(original_data);
224
- // strides_array[i] = temp_strides[i].data();
225
- // }
226
- // }
227
-
228
- // }
229
-
230
- // template <int N>
231
- // static inline void reorder_dimensions(
232
- // std::vector<int64_t>& shape_,
233
- // std::vector<int64_t>& stride_size,
234
- // std::array<int64_t*, N>& strides_array) {
235
- // // Sort the dimensions based on strides in ascending order with reduced
236
- // dims
237
- // // at the front. NOTE: that this inverts the order of C-contiguous tensors.
238
- // // strides[0] is the fastest moving dimension instead of strides[ndim - 1].
239
- // // See NOTE: [Computing output strides] and inline comments for more
240
- // detailed description
241
-
242
- // auto ndim = shape_.size();
243
- // std::vector<int64_t> perm_;
244
-
245
- // perm_.resize(ndim);
246
- // if (ndim == 1) {
247
- // perm_[0] = 0;
248
- // return;
249
- // }
250
-
251
- // // initialize perm with n-1, n-2, ..., 1, 0
252
- // std::iota(perm_.rbegin(), perm_.rend(), 0);
253
-
254
- // // Reordering dimensions changes iteraton order
255
- // // if (enforce_linear_iteration_) {
256
- // // permute_dimensions(perm_);
257
- // // return;
258
- // // }
259
-
260
- // // returns 1 if the dim0 should come after dim1, -1 if dim0 should come
261
- // // before dim1, and 0 if the comparison is ambiguous.
262
- // auto should_swap = [&](size_t dim0, size_t dim1) {
263
- // for (int64_t arg=0; arg < N; arg++) {
264
- // // ignore undefined or incorrectly sized tensors
265
- // if (strides_array[arg] == nullptr) {
266
- // continue;
267
- // }
268
- // int64_t stride0 = strides_array[arg][dim0];
269
- // int64_t stride1 = strides_array[arg][dim1];
270
- // //move on to the next input if one of the dimensions is broadcasted
271
- // if (stride0 == 0 || stride1 == 0) {
272
- // continue;
273
- // // it is important to return here only with strict comparisons, for
274
- // equal strides we try to break the tie later
275
- // // by comparing corresponding dimensions or if that does not work,
276
- // moving on to the next tensor } else if (stride0 < stride1) {
277
- // return -1;
278
- // } else if (stride0 > stride1) {
279
- // return 1;
280
- // } else { //equal strides, use dimensions themselves as the tie-breaker.
281
- // //at this point, with zero strides out of the way, we are guaranteed
282
- // that operand dimensions are equal to shape_
283
- // auto t_dim0 = shape_[dim0];
284
- // auto t_dim1 = shape_[dim1];
285
- // //return only if dimensions should be swapped, otherwise move on to
286
- // the next tensor if (t_dim0 > t_dim1) {
287
- // return 1;
288
- // }
289
- // }
290
- // }
291
- // return 0;
292
- // };
293
- // // insertion sort with support for ambiguous comparisons
294
- // for (int64_t i=0; i<ndim; i++) {
295
- // int dim1 = i;
296
- // for (int dim0 = i - 1; dim0 >= 0; dim0--) {
297
- // int comparison = should_swap(perm_[dim0], perm_[dim1]);
298
- // if (comparison > 0) {
299
- // std::swap(perm_[dim0], perm_[dim1]);
300
- // dim1 = dim0;
301
- // } else if (comparison < 0) {
302
- // break;
303
- // }
304
- // }
305
- // }
306
-
307
- // // perform re-ordering of shape and strides
308
- // permute_dimensions<N>(strides_array, stride_size, perm_, shape_);
309
- // }
310
-
311
- // template<int N>
312
- // void coalesce_dimensions(
313
- // int64_t ndim,
314
- // std::array<int64_t*, N>& strides_array,
315
- // std::vector<int64_t> &stride_size,
316
- // std::vector<int64_t> &shape_
317
- // ) {
318
- // for (size_t i=0; i<N; i++) {
319
- // int64_t* stride_tmp = strides_array[i];
320
- // }
321
-
322
- // if (ndim <= 1) {
323
- // return;
324
- // }
325
-
326
- // // We can coalesce two adjacent dimensions if either dim has size 1 or if:
327
- // // shape[n] * stride[n] == stride[n + 1].
328
- // auto can_coalesce = [&](int dim0, int dim1) {
329
- // auto shape0 = shape_[dim0];
330
- // auto shape1 = shape_[dim1];
331
- // if (shape0 == 1 || shape1 == 1) {
332
- // return true;
333
- // }
334
- // for (int64_t i=0; i<N; i++) {
335
- // auto& stride = strides_array[i];
336
- // if (shape0 * stride[dim0] != stride[dim1]) {
337
- // return false;
338
- // }
339
- // }
340
- // return true;
341
- // };
342
-
343
- // // replace each operands stride at dim0 with its stride at dim1
344
- // auto replace_stride = [&](int dim0, int dim1) {
345
- // for (int64_t i=0; i<N; i++) {
346
- // auto& stride = strides_array[i];
347
- // stride[dim0] = stride[dim1];
348
- // }
349
- // };
350
-
351
- // int prev_dim = 0;
352
- // for (int64_t dim=1; dim<ndim; dim++) {
353
- // if (can_coalesce(prev_dim, dim)) {
354
- // if (shape_[prev_dim] == 1) {
355
- // replace_stride(prev_dim, dim);
356
- // }
357
- // shape_[prev_dim] *= shape_[dim];
358
- // } else {
359
- // prev_dim++;
360
- // if (prev_dim != dim) {
361
- // replace_stride(prev_dim, dim);
362
- // shape_[prev_dim] = shape_[dim];
363
- // }
364
- // }
365
- // }
366
- // shape_.resize(prev_dim + 1);
367
- // for (int64_t i=0; i<N; i++) {
368
- // stride_size[i] = shape_.size();
369
- // }
370
- // }
371
-
372
- // template <int N, bool signed_strides = false>
373
- // static OffsetCalculator<N, uint32_t, signed_strides>
374
- // make_offset_calculator_put(
375
- // IndexPutStride index_put_stride
376
- // ) {
377
- // return OffsetCalculator<N, uint32_t, signed_strides>(
378
- // index_put_stride.desired_shape_.size(),
379
- // index_put_stride.desired_shape_.data(),
380
- // index_put_stride.strides_array_.data());
381
- // }
382
-
383
- // template <int N, bool signed_strides = false>
384
- // static OffsetCalculator<N, uint32_t, signed_strides>
385
- // make_offset_calculator_put(
386
- // std::vector<int64_t> output_dims, // value_tensor
387
- // std::vector<int64_t> output_strides,
388
- // int64_t output_elesize,
389
- // std::vector<int64_t> input_dims, // input_tensor
390
- // std::vector<int64_t> input_strides,
391
- // int64_t input_elesize,
392
- // std::vector<int64_t> index_dims, // index_tensor
393
- // std::vector<int64_t> index_strides,
394
- // int64_t index_elesize,
395
- // int64_t &numel
396
- // ) {
397
- // int ndim = output_dims.size();
398
- // // need a 2D stride vector to hold stride for each
399
- // std::array<int64_t*, N> strides_array;
400
- // std::array<std::vector<int64_t>, N> strides_vec;
401
- // std::vector<int64_t> stride_size;
402
-
403
- // std::vector<int64_t> desired_shape = compute_shapes(
404
- // {input_dims, output_dims, index_dims}
405
- // );
406
-
407
- // // dangling pointer
408
- // strides_vec[0] = compute_strides(
409
- // output_dims, // input_tensor
410
- // output_strides,
411
- // output_elesize,
412
- // ndim,
413
- // desired_shape,
414
- // stride_size
415
- // );
416
-
417
- // strides_vec[1] = compute_strides(
418
- // input_dims, // value_tensor
419
- // input_strides,
420
- // input_elesize,
421
- // ndim,
422
- // desired_shape,
423
- // stride_size
424
- // );
425
-
426
- // strides_vec[2] = compute_strides(
427
- // index_dims, // index_tensor
428
- // index_strides,
429
- // index_elesize,
430
- // ndim,
431
- // desired_shape,
432
- // stride_size
433
- // );
434
-
435
- // for (size_t i=0; i<N; i++) {
436
- // strides_array[i] = strides_vec[i].data();
437
- // }
438
-
439
- // reorder_dimensions<N>(desired_shape, stride_size, strides_array);
440
-
441
- // coalesce_dimensions<N>(ndim, strides_array, stride_size, desired_shape);
442
-
443
- // int num = 1;
444
- // for (int i=0; i<desired_shape.size(); i++) {
445
- // num *= desired_shape[i];
446
- // }
447
- // numel = num;
448
-
449
- // return OffsetCalculator<N, uint32_t, signed_strides>(
450
- // desired_shape.size(), desired_shape.data(), strides_array.data());
451
- // }
452
-
453
135
template <int N, bool signed_strides = false >
454
136
static OffsetCalculator<N, uint32_t , signed_strides> make_offset_calculator_put (
455
137
std::vector<int64_t > desired_shape, std::array<int64_t *, N> strides_array) {
456
- // int ndim = output_dims.size();
457
- // // need a 2D stride vector to hold stride for each
458
- // std::array<int64_t*, N> strides_array;
459
- // std::array<std::vector<int64_t>, N> strides_vec;
460
- // std::vector<int64_t> stride_size;
461
-
462
- // std::vector<int64_t> desired_shape = compute_shapes(
463
- // {input_dims, output_dims, index_dims}
464
- // );
465
-
466
- // // dangling pointer
467
- // strides_vec[0] = compute_strides(
468
- // output_dims, // input_tensor
469
- // output_strides,
470
- // output_elesize,
471
- // ndim,
472
- // desired_shape,
473
- // stride_size
474
- // );
475
-
476
- // strides_vec[1] = compute_strides(
477
- // input_dims, // value_tensor
478
- // input_strides,
479
- // input_elesize,
480
- // ndim,
481
- // desired_shape,
482
- // stride_size
483
- // );
484
-
485
- // strides_vec[2] = compute_strides(
486
- // index_dims, // index_tensor
487
- // index_strides,
488
- // index_elesize,
489
- // ndim,
490
- // desired_shape,
491
- // stride_size
492
- // );
493
-
494
- // for (size_t i=0; i<N; i++) {
495
- // strides_array[i] = strides_vec[i].data();
496
- // }
497
-
498
- // reorder_dimensions<N>(desired_shape, stride_size, strides_array);
499
-
500
- // coalesce_dimensions<N>(ndim, strides_array, stride_size, desired_shape);
501
-
502
- // int num = 1;
503
- // for (int i=0; i<desired_shape.size(); i++) {
504
- // num *= desired_shape[i];
505
- // }
506
- // numel = num;
507
-
508
138
return OffsetCalculator<N, uint32_t , signed_strides>(
509
139
desired_shape.size (), desired_shape.data (), strides_array.data ());
510
140
}
0 commit comments