Skip to content

Commit 22e48d8

Browse files
committed
fix and improve: VAE tiling
- properly handle the upper left corner interpolating both x and y - refactor out lerp - use smootherstep to preserve more detail and spend less area blending
1 parent 8847114 commit 22e48d8

File tree

1 file changed

+36
-5
lines changed

1 file changed

+36
-5
lines changed

ggml_extend.hpp

+36-5
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,16 @@ __STATIC_INLINE__ void ggml_split_tensor_2d(struct ggml_tensor* input,
349349
}
350350
}
351351

352+
__STATIC_INLINE__ float ggml_lerp_f32(const float a, const float b, const float x) {
353+
return (1 - x) * a + x * b;
354+
}
355+
356+
// unclamped -> expects x in the range [0-1]
357+
__STATIC_INLINE__ float ggml_smootherstep_f32(const float x) {
358+
GGML_ASSERT(x >= 0.f && x <= 1.f);
359+
return x * x * x * (x * (6.0f * x - 15.0f) + 10.0f);
360+
}
361+
352362
__STATIC_INLINE__ void ggml_merge_tensor_2d(struct ggml_tensor* input,
353363
struct ggml_tensor* output,
354364
int x,
@@ -364,12 +374,33 @@ __STATIC_INLINE__ void ggml_merge_tensor_2d(struct ggml_tensor* input,
364374
float new_value = ggml_tensor_get_f32(input, ix, iy, k);
365375
if (overlap > 0) { // blend colors in overlapped area
366376
float old_value = ggml_tensor_get_f32(output, x + ix, y + iy, k);
367-
if (x > 0 && ix < overlap) { // in overlapped horizontal
368-
ggml_tensor_set_f32(output, old_value + (new_value - old_value) * (ix / (1.0f * overlap)), x + ix, y + iy, k);
377+
const bool inside_x_overlap = x > 0 && ix < overlap;
378+
const bool inside_y_overlap = y > 0 && iy < overlap;
379+
if (inside_x_overlap && inside_y_overlap) {
380+
// upper left corner needs to be interpolated in both directions
381+
const float x_f = ix / float(overlap);
382+
const float y_f = iy / float(overlap);
383+
// TODO: try `x+y - 1`
384+
const float f = std::min(x_f, y_f); // min of both
385+
ggml_tensor_set_f32(
386+
output,
387+
ggml_lerp_f32(old_value, new_value, ggml_smootherstep_f32(f)),
388+
x + ix, y + iy, k
389+
);
369390
continue;
370-
}
371-
if (y > 0 && iy < overlap) { // in overlapped vertical
372-
ggml_tensor_set_f32(output, old_value + (new_value - old_value) * (iy / (1.0f * overlap)), x + ix, y + iy, k);
391+
} else if (inside_x_overlap) {
392+
ggml_tensor_set_f32(
393+
output,
394+
ggml_lerp_f32(old_value, new_value, ggml_smootherstep_f32(ix / float(overlap))),
395+
x + ix, y + iy, k
396+
);
397+
continue;
398+
} else if (inside_y_overlap) {
399+
ggml_tensor_set_f32(
400+
output,
401+
ggml_lerp_f32(old_value, new_value, ggml_smootherstep_f32(iy / float(overlap))),
402+
x + ix, y + iy, k
403+
);
373404
continue;
374405
}
375406
}

0 commit comments

Comments
 (0)