@@ -349,6 +349,16 @@ __STATIC_INLINE__ void ggml_split_tensor_2d(struct ggml_tensor* input,
349
349
}
350
350
}
351
351
352
+ __STATIC_INLINE__ float ggml_lerp_f32 (const float a, const float b, const float x) {
353
+ return (1 - x) * a + x * b;
354
+ }
355
+
356
+ // unclamped -> expects x in the range [0-1]
357
+ __STATIC_INLINE__ float ggml_smootherstep_f32 (const float x) {
358
+ GGML_ASSERT (x >= 0 .f && x <= 1 .f );
359
+ return x * x * x * (x * (6 .0f * x - 15 .0f ) + 10 .0f );
360
+ }
361
+
352
362
__STATIC_INLINE__ void ggml_merge_tensor_2d (struct ggml_tensor * input,
353
363
struct ggml_tensor * output,
354
364
int x,
@@ -364,12 +374,33 @@ __STATIC_INLINE__ void ggml_merge_tensor_2d(struct ggml_tensor* input,
364
374
float new_value = ggml_tensor_get_f32 (input, ix, iy, k);
365
375
if (overlap > 0 ) { // blend colors in overlapped area
366
376
float old_value = ggml_tensor_get_f32 (output, x + ix, y + iy, k);
367
- if (x > 0 && ix < overlap) { // in overlapped horizontal
368
- ggml_tensor_set_f32 (output, old_value + (new_value - old_value) * (ix / (1 .0f * overlap)), x + ix, y + iy, k);
377
+ const bool inside_x_overlap = x > 0 && ix < overlap;
378
+ const bool inside_y_overlap = y > 0 && iy < overlap;
379
+ if (inside_x_overlap && inside_y_overlap) {
380
+ // upper left corner needs to be interpolated in both directions
381
+ const float x_f = ix / float (overlap);
382
+ const float y_f = iy / float (overlap);
383
+ // TODO: try `x+y - 1`
384
+ const float f = std::min (x_f, y_f); // min of both
385
+ ggml_tensor_set_f32 (
386
+ output,
387
+ ggml_lerp_f32 (old_value, new_value, ggml_smootherstep_f32 (f)),
388
+ x + ix, y + iy, k
389
+ );
369
390
continue ;
370
- }
371
- if (y > 0 && iy < overlap) { // in overlapped vertical
372
- ggml_tensor_set_f32 (output, old_value + (new_value - old_value) * (iy / (1 .0f * overlap)), x + ix, y + iy, k);
391
+ } else if (inside_x_overlap) {
392
+ ggml_tensor_set_f32 (
393
+ output,
394
+ ggml_lerp_f32 (old_value, new_value, ggml_smootherstep_f32 (ix / float (overlap))),
395
+ x + ix, y + iy, k
396
+ );
397
+ continue ;
398
+ } else if (inside_y_overlap) {
399
+ ggml_tensor_set_f32 (
400
+ output,
401
+ ggml_lerp_f32 (old_value, new_value, ggml_smootherstep_f32 (iy / float (overlap))),
402
+ x + ix, y + iy, k
403
+ );
373
404
continue ;
374
405
}
375
406
}
0 commit comments