@@ -349,6 +349,16 @@ __STATIC_INLINE__ void ggml_split_tensor_2d(struct ggml_tensor* input,
349349 }
350350}
351351
352+ __STATIC_INLINE__ float ggml_lerp_f32 (const float a, const float b, const float x) {
353+ return (1 - x) * a + x * b;
354+ }
355+
356+ // unclamped -> expects x in the range [0-1]
357+ __STATIC_INLINE__ float ggml_smootherstep_f32 (const float x) {
358+ GGML_ASSERT (x >= 0 .f && x <= 1 .f );
359+ return x * x * x * (x * (6 .0f * x - 15 .0f ) + 10 .0f );
360+ }
361+
352362__STATIC_INLINE__ void ggml_merge_tensor_2d (struct ggml_tensor * input,
353363 struct ggml_tensor * output,
354364 int x,
@@ -364,12 +374,33 @@ __STATIC_INLINE__ void ggml_merge_tensor_2d(struct ggml_tensor* input,
364374 float new_value = ggml_tensor_get_f32 (input, ix, iy, k);
365375 if (overlap > 0 ) { // blend colors in overlapped area
366376 float old_value = ggml_tensor_get_f32 (output, x + ix, y + iy, k);
367- if (x > 0 && ix < overlap) { // in overlapped horizontal
368- ggml_tensor_set_f32 (output, old_value + (new_value - old_value) * (ix / (1 .0f * overlap)), x + ix, y + iy, k);
377+ const bool inside_x_overlap = x > 0 && ix < overlap;
378+ const bool inside_y_overlap = y > 0 && iy < overlap;
379+ if (inside_x_overlap && inside_y_overlap) {
380+ // upper left corner needs to be interpolated in both directions
381+ const float x_f = ix / float (overlap);
382+ const float y_f = iy / float (overlap);
383+ // TODO: try `x+y - 1`
384+ const float f = std::min (x_f, y_f); // min of both
385+ ggml_tensor_set_f32 (
386+ output,
387+ ggml_lerp_f32 (old_value, new_value, ggml_smootherstep_f32 (f)),
388+ x + ix, y + iy, k
389+ );
369390 continue ;
370- }
371- if (y > 0 && iy < overlap) { // in overlapped vertical
372- ggml_tensor_set_f32 (output, old_value + (new_value - old_value) * (iy / (1 .0f * overlap)), x + ix, y + iy, k);
391+ } else if (inside_x_overlap) {
392+ ggml_tensor_set_f32 (
393+ output,
394+ ggml_lerp_f32 (old_value, new_value, ggml_smootherstep_f32 (ix / float (overlap))),
395+ x + ix, y + iy, k
396+ );
397+ continue ;
398+ } else if (inside_y_overlap) {
399+ ggml_tensor_set_f32 (
400+ output,
401+ ggml_lerp_f32 (old_value, new_value, ggml_smootherstep_f32 (iy / float (overlap))),
402+ x + ix, y + iy, k
403+ );
373404 continue ;
374405 }
375406 }
0 commit comments