From 0af8c3756e4ae4e01d89b3ff27937628f6bcd7c8 Mon Sep 17 00:00:00 2001 From: itzpr3d4t0r <103119829+itzpr3d4t0r@users.noreply.github.com> Date: Fri, 26 Apr 2024 17:04:06 +0200 Subject: [PATCH 01/28] First implementation of caching mechanism as blitcopy optimization. --- src_c/alphablit.c | 133 +++++++++++++++++++++++++++ src_c/simd_blitters.h | 3 + src_c/simd_blitters_avx2.c | 69 ++++++++++++++ src_c/surface.c | 180 +++++++++++++++++++++++++++++++++---- src_c/surface.h | 9 ++ 5 files changed, 375 insertions(+), 19 deletions(-) diff --git a/src_c/alphablit.c b/src_c/alphablit.c index e975218fda..087ca706ad 100644 --- a/src_c/alphablit.c +++ b/src_c/alphablit.c @@ -61,6 +61,16 @@ blit_blend_premultiplied(SDL_BlitInfo *info); static int SoftBlitPyGame(SDL_Surface *src, SDL_Rect *srcrect, SDL_Surface *dst, SDL_Rect *dstrect, int blend_flags); + +int +SoftCachedBlitPyGame(SDL_Surface *src, SDL_Surface *dst, int blend_flags, + Uint32 ***destinations, Py_ssize_t destinations_size, + PyObject *list); + +static void +pg_cached_blitcopy(SDL_Surface *src, SDL_Surface *dst, Uint32 **destinations, + Py_ssize_t destinations_size); + extern int SDL_RLESurface(SDL_Surface *surface); extern void @@ -580,6 +590,129 @@ SoftBlitPyGame(SDL_Surface *src, SDL_Rect *srcrect, SDL_Surface *dst, return (okay ? 0 : -1); } +static int +pg_IntFromObj2(PyObject *obj, int *val) +{ + int tmp_val; + + if (PyFloat_Check(obj)) { + double dv = PyFloat_AsDouble(obj); + tmp_val = (int)dv; + } + else { + tmp_val = PyLong_AsLong(obj); + } + + if (tmp_val == -1 && PyErr_Occurred()) { + PyErr_Clear(); + return 0; + } + *val = tmp_val; + return 1; +} + +static void +pg_cached_blitcopy(SDL_Surface *src, SDL_Surface *dst, Uint32 **destinations, + Py_ssize_t destinations_size) +{ +#if !defined(__EMSCRIPTEN__) +#if SDL_BYTEORDER == SDL_LIL_ENDIAN + if (pg_has_avx2()) { + _pg_cached_blitcopy_avx2(src, dst, destinations, destinations_size); + return; + } +#endif /* SDL_BYTEORDER == SDL_LIL_ENDIAN */ +#endif /* __EMSCRIPTEN__ */ +} + +int +SoftCachedBlitPyGame(SDL_Surface *src, SDL_Surface *dst, int blend_flags, + Uint32 ***destinations, Py_ssize_t destinations_size, + PyObject *list) +{ + int okay; + int src_locked; + int dst_locked; + Uint32 colorkey; + SDL_BlendMode src_blend; + Py_ssize_t i; + + /* Everything is okay at the beginning... */ + okay = 1; + int error = 0; + + /* Lock the destination if it's in hardware */ + dst_locked = 0; + if (SDL_MUSTLOCK(dst)) { + if (SDL_LockSurface(dst) < 0) + okay = 0; + else + dst_locked = 1; + } + /* Lock the source if it's in hardware */ + src_locked = 0; + if (SDL_MUSTLOCK(src)) { + if (SDL_LockSurface(src) < 0) + okay = 0; + else + src_locked = 1; + } + + /* load destinations */ + PyObject **list_items = PySequence_Fast_ITEMS(list); + for (i = 0; i < destinations_size; i++) { + int x, y; + PyObject *tup = list_items[i]; + + if (!PyTuple_Check(tup) || PyTuple_GET_SIZE(tup) != 2) { + error = -1; + break; + } + + if (!pg_IntFromObj2(PyTuple_GET_ITEM(tup, 0), &x) || + !pg_IntFromObj2(PyTuple_GET_ITEM(tup, 1), &y)) { + error = -1; + break; + } + + if (x < 0 || x > dst->w - src->w || y < 0 || y > dst->h - src->h) { + error = -1; + break; + } + + (*destinations)[i] = (Uint32 *)dst->pixels + y * dst->pitch / 4 + x; + } + + /* Set up source and destination buffer pointers, and BLIT! */ + if (okay) { + switch (blend_flags) { + case 0: + /* unhandled cases */ + if (SDL_GetSurfaceBlendMode(src, &src_blend) != 0 || + (src_blend == SDL_BLENDMODE_NONE && src->format->Amask) || + SDL_GetColorKey(src, &colorkey) == 0) { + okay = 0; + break; + } + + /* blitcopy */ + pg_cached_blitcopy(src, dst, *destinations, destinations_size); + break; + default: + okay = 0; + break; + } + } + + /* We need to unlock the surfaces if they're locked */ + if (dst_locked) + SDL_UnlockSurface(dst); + if (src_locked) + SDL_UnlockSurface(src); + /* Blit is done! */ + return (okay ? 0 : -1); +} + /* --------------------------------------------------------- */ static void diff --git a/src_c/simd_blitters.h b/src_c/simd_blitters.h index 906f60fdae..376650ca6f 100644 --- a/src_c/simd_blitters.h +++ b/src_c/simd_blitters.h @@ -83,3 +83,6 @@ void blit_blend_premultiplied_avx2(SDL_BlitInfo *info); void premul_surf_color_by_alpha_avx2(SDL_Surface *src, SDL_Surface *dst); +void +_pg_cached_blitcopy_avx2(SDL_Surface *src, SDL_Surface *dst, + Uint32 **destinations, Py_ssize_t destinations_size); diff --git a/src_c/simd_blitters_avx2.c b/src_c/simd_blitters_avx2.c index d113b2b87e..421f092f68 100644 --- a/src_c/simd_blitters_avx2.c +++ b/src_c/simd_blitters_avx2.c @@ -190,6 +190,41 @@ pg_avx2_at_runtime_but_uncompiled() _mm256_srli_epi16( \ _mm256_mulhi_epu16(MM256I, _mm256_set1_epi16((short)0x8081)), 7); +#define CACHED_BLIT_SETUP_AVX2 \ + Py_ssize_t j, k, y; \ + Uint32 *srcp; \ + int n_iters_8 = src->w / 8, pxl_excess = src->w % 8; \ + int src_skip = src->pitch / 4 - src->w; \ + int dst_skip = dst->pitch / 4 - src->w; \ + int h, n; \ + \ + __m256i *srcp256 = (__m256i *)src->pixels; \ + __m256i *dstp256; \ + \ + __m256i mm256_src, mm256_dst; \ + __m256i mask = \ + _mm256_set_epi32(0, pxl_excess > 6 ? -1 : 0, pxl_excess > 5 ? -1 : 0, \ + pxl_excess > 4 ? -1 : 0, pxl_excess > 3 ? -1 : 0, \ + pxl_excess > 2 ? -1 : 0, pxl_excess > 1 ? -1 : 0, \ + pxl_excess > 0 ? -1 : 0); \ + \ + Py_ssize_t cache_size = (n_iters_8 + (pxl_excess ? 1 : 0)) * src->h; \ + __m256i *cache = (__m256i *)malloc(cache_size * sizeof(__m256i)); \ + if (!cache) { \ + return; \ + } \ + \ + /* Load the cache with the source pixels */ \ + for (j = src->h, k = 0; j--;) { \ + for (n = 0; n < n_iters_8; n++, k++, srcp256++) \ + cache[k] = _mm256_loadu_si256(srcp256); \ + \ + if (pxl_excess) \ + cache[k++] = _mm256_maskload_epi32((int *)srcp256, mask); \ + \ + srcp256 = (__m256i *)((Uint32 *)srcp256 + src_skip + pxl_excess); \ + } + #if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \ !defined(SDL_DISABLE_IMMINTRIN_H) void @@ -1643,3 +1678,37 @@ premul_surf_color_by_alpha_avx2(SDL_Surface *src, SDL_Surface *dst) } #endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \ !defined(SDL_DISABLE_IMMINTRIN_H) */ + +#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \ + !defined(SDL_DISABLE_IMMINTRIN_H) +void +_pg_cached_blitcopy_avx2(SDL_Surface *src, SDL_Surface *dst, + Uint32 **destinations, Py_ssize_t destinations_size) +{ + CACHED_BLIT_SETUP_AVX2; + + /* Blit the cache */ + for (j = 0; j < destinations_size; j++) { + dstp256 = destinations[j]; + for (y = src->h, k = 0; y--;) { + for (n = 0; n < n_iters_8; n++, k++, dstp256++) + _mm256_storeu_si256(dstp256, cache[k]); + + if (pxl_excess) + _mm256_maskstore_epi32((int *)dstp256, mask, cache[k++]); + + dstp256 = (__m256i *)((Uint32 *)dstp256 + dst_skip + pxl_excess); + } + } + + free(cache); +} +#else +void +_pg_cached_blitcopy_avx2(SDL_Surface *src, SDL_Surface *dst, + Uint32 **destinations, Py_ssize_t destinations_size) +{ + BAD_AVX2_FUNCTION_CALL; +} +#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \ +!defined(SDL_DISABLE_IMMINTRIN_H) */ diff --git a/src_c/surface.c b/src_c/surface.c index 3f90293434..e7053c1482 100644 --- a/src_c/surface.c +++ b/src_c/surface.c @@ -262,6 +262,8 @@ static SDL_Surface * pg_DisplayFormat(SDL_Surface *surface); static int _PgSurface_SrcAlpha(SDL_Surface *surf); +int +pg_HasSurfaceRLE(SDL_Surface *surface); static PyGetSetDef surface_getsets[] = { {"_pixels_address", (getter)surf_get_pixels_address, NULL, @@ -2150,6 +2152,11 @@ surf_blits(pgSurfaceObject *self, PyObject *args, PyObject *keywds) #define FBLITS_ERR_TUPLE_REQUIRED 11 #define FBLITS_ERR_INCORRECT_ARGS_NUM 12 #define FBLITS_ERR_FLAG_NOT_NUMERIC 13 +#define FBLITS_ERR_CACHE_NOT_NUMERIC 14 +#define FBLITS_ERR_CACHE_NOT_SAMEFMT 15 +#define FBLITS_ERR_CACHE_RLE_NOT_SUPPORTED 16 +#define FBLITS_ERR_FLAG_NOT_SUPPORTED 17 +#define FBLITS_ERR_NO_MEMORY 18 int _surf_fblits_item_check_and_blit(pgSurfaceObject *self, PyObject *item, @@ -2200,6 +2207,117 @@ _surf_fblits_item_check_and_blit(pgSurfaceObject *self, PyObject *item, return 0; } +int +_surf_fblits_cached_item_check_and_blit(pgSurfaceObject *self, PyObject *item, + int blend_flags, + Uint32 ***destinations, + Py_ssize_t *allocated_size) +{ + PyObject *src_surf, *pos_list; + SDL_Surface *src, *dst = pgSurface_AsSurface(self); + SDL_Surface *subsurface; + int suboffsetx = 0, suboffsety = 0; + SDL_Rect orig_clip, sub_clip; + Py_ssize_t i; + int error = 0; + + /* Check that the item is a tuple of length 2 */ + if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) { + return FBLITS_ERR_TUPLE_REQUIRED; + } + + /* Extract the Surface and sequence of destination objects from the + * (Surface, positions) tuple */ + src_surf = PyTuple_GET_ITEM(item, 0); + pos_list = PyTuple_GET_ITEM(item, 1); + + if (!PyList_Check(pos_list)) { + return BLITS_ERR_SEQUENCE_REQUIRED; + } + + /* Check that the source is a Surface */ + if (!pgSurface_Check(src_surf)) { + return BLITS_ERR_SOURCE_NOT_SURFACE; + } + if (!(src = pgSurface_AsSurface(src_surf))) { + return BLITS_ERR_SEQUENCE_SURF; + } + + /* Check that the source and destination surfaces have the same format */ + if (src->format->format != dst->format->format || + src->format->BytesPerPixel != dst->format->BytesPerPixel || + src->format->BytesPerPixel != 4) { + return FBLITS_ERR_CACHE_NOT_SAMEFMT; + } + + /* rule out RLE */ + if (pg_HasSurfaceRLE(src) || pg_HasSurfaceRLE(dst) || + (src->flags & SDL_RLEACCEL) || (dst->flags & SDL_RLEACCEL)) { + return FBLITS_ERR_CACHE_RLE_NOT_SUPPORTED; + } + + /* manage destinations memory allocation or reallocation */ + Py_ssize_t new_size = PyList_GET_SIZE(pos_list); + if (!*destinations) { + *destinations = (Uint32 **)malloc(new_size * sizeof(Uint32 *)); + if (!*destinations) { + return FBLITS_ERR_NO_MEMORY; + } + } + else { + if (new_size > *allocated_size) { + *destinations = + (Uint32 **)realloc(*destinations, new_size * sizeof(Uint32 *)); + + if (!*destinations) + return FBLITS_ERR_NO_MEMORY; + } + } + *allocated_size = new_size; + + if (self->subsurface) { + PyObject *owner; + struct pgSubSurface_Data *subdata; + + subdata = self->subsurface; + owner = subdata->owner; + subsurface = pgSurface_AsSurface(owner); + suboffsetx = subdata->offsetx; + suboffsety = subdata->offsety; + + while (((pgSurfaceObject *)owner)->subsurface) { + subdata = ((pgSurfaceObject *)owner)->subsurface; + owner = subdata->owner; + subsurface = pgSurface_AsSurface(owner); + suboffsetx += subdata->offsetx; + suboffsety += subdata->offsety; + } + + SDL_GetClipRect(subsurface, &orig_clip); + SDL_GetClipRect(dst, &sub_clip); + sub_clip.x += suboffsetx; + sub_clip.y += suboffsety; + SDL_SetClipRect(subsurface, &sub_clip); + dst = subsurface; + } + else { + pgSurface_Prep(self); + subsurface = NULL; + } + pgSurface_Prep((pgSurfaceObject *)src_surf); + + error = SoftCachedBlitPyGame(src, dst, blend_flags, destinations, + *allocated_size, pos_list); + + if (subsurface) + SDL_SetClipRect(subsurface, &orig_clip); + else + pgSurface_Unprep(self); + pgSurface_Unprep((pgSurfaceObject *)src_surf); + + return error; +} + static PyObject * surf_fblits(pgSurfaceObject *self, PyObject *const *args, Py_ssize_t nargs) { @@ -2210,13 +2328,16 @@ surf_fblits(pgSurfaceObject *self, PyObject *const *args, Py_ssize_t nargs) int blend_flags = 0; /* Default flag is 0, opaque */ int error = 0; int is_generator = 0; + int cache = 0; + Uint32 **destinations = NULL; + Py_ssize_t destinations_size = 0; - if (nargs == 0 || nargs > 2) { + if (nargs == 0 || nargs > 3) { error = FBLITS_ERR_INCORRECT_ARGS_NUM; goto on_error; } /* Get the blend flags if they are passed */ - else if (nargs == 2) { + else if (nargs >= 2) { if (!PyLong_Check(args[1])) { error = FBLITS_ERR_FLAG_NOT_NUMERIC; goto on_error; @@ -2225,6 +2346,17 @@ surf_fblits(pgSurfaceObject *self, PyObject *const *args, Py_ssize_t nargs) if (PyErr_Occurred()) { return NULL; } + + if (nargs == 3) { + if (!PyBool_Check(args[2])) { + error = FBLITS_ERR_CACHE_NOT_NUMERIC; + goto on_error; + } + cache = PyObject_IsTrue(args[2]); + if (PyErr_Occurred()) { + return NULL; + } + } } blit_sequence = args[0]; @@ -2235,26 +2367,18 @@ surf_fblits(pgSurfaceObject *self, PyObject *const *args, Py_ssize_t nargs) PyObject **sequence_items = PySequence_Fast_ITEMS(blit_sequence); for (i = 0; i < PySequence_Fast_GET_SIZE(blit_sequence); i++) { item = sequence_items[i]; - error = _surf_fblits_item_check_and_blit(self, item, blend_flags); - if (error) { - goto on_error; + if (cache) { + error = _surf_fblits_cached_item_check_and_blit( + self, item, blend_flags, &destinations, + &destinations_size); + } + else { + error = + _surf_fblits_item_check_and_blit(self, item, blend_flags); } - } - } - /* Generator path */ - else if (PyIter_Check(blit_sequence)) { - is_generator = 1; - while ((item = PyIter_Next(blit_sequence))) { - error = _surf_fblits_item_check_and_blit(self, item, blend_flags); if (error) { goto on_error; } - Py_DECREF(item); - } - - /* If the generator raises an exception */ - if (PyErr_Occurred()) { - return NULL; } } else { @@ -2262,12 +2386,15 @@ surf_fblits(pgSurfaceObject *self, PyObject *const *args, Py_ssize_t nargs) goto on_error; } + free(destinations); + Py_RETURN_NONE; on_error: if (is_generator) { Py_XDECREF(item); } + free(destinations); switch (error) { case BLITS_ERR_SEQUENCE_REQUIRED: return RAISE( @@ -2300,10 +2427,25 @@ surf_fblits(pgSurfaceObject *self, PyObject *const *args, Py_ssize_t nargs) case FBLITS_ERR_INCORRECT_ARGS_NUM: return RAISE(PyExc_ValueError, "Incorrect number of parameters passed: need at " - "least one, 2 at max"); + "least one, 3 at max"); case FBLITS_ERR_FLAG_NOT_NUMERIC: return RAISE(PyExc_TypeError, "The special_flags parameter must be an int"); + case FBLITS_ERR_CACHE_NOT_NUMERIC: + return RAISE(PyExc_TypeError, + "The cache parameter must be a bool"); + case FBLITS_ERR_CACHE_NOT_SAMEFMT: + return RAISE(PyExc_TypeError, + "The source surface has wrong format"); + case FBLITS_ERR_CACHE_RLE_NOT_SUPPORTED: + return RAISE(PyExc_TypeError, + "RLE acceleration while caching is not supported"); + case FBLITS_ERR_FLAG_NOT_SUPPORTED: + return RAISE(PyExc_NotImplementedError, + "The flag used or blit mode selected is not " + "supported for this operation"); + case FBLITS_ERR_NO_MEMORY: + return RAISE(PyExc_MemoryError, "No memory available"); } return RAISE(PyExc_TypeError, "Unknown error"); } diff --git a/src_c/surface.h b/src_c/surface.h index 3ae05eb408..5990f57815 100644 --- a/src_c/surface.h +++ b/src_c/surface.h @@ -352,6 +352,15 @@ int pygame_Blit(SDL_Surface *src, SDL_Rect *srcrect, SDL_Surface *dst, SDL_Rect *dstrect, int blend_flags); +int +SoftCachedBlitPyGame(SDL_Surface *src, SDL_Surface *dst, int blend_flags, + Uint32 ***destinations, Py_ssize_t destinations_size, + PyObject *list); + +static void +pg_cached_blitcopy(SDL_Surface *src, SDL_Surface *dst, Uint32 **destinations, + Py_ssize_t destinations_size); + int premul_surf_color_by_alpha(SDL_Surface *src, SDL_Surface *dst); From 5db5c252a36d86a85b5f8d850533b7ea2ec00ece Mon Sep 17 00:00:00 2001 From: itzpr3d4t0r <103119829+itzpr3d4t0r@users.noreply.github.com> Date: Fri, 26 Apr 2024 17:31:57 +0200 Subject: [PATCH 02/28] fixes --- buildconfig/stubs/pygame/surface.pyi | 4 +++- src_c/surface.c | 23 +++++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/buildconfig/stubs/pygame/surface.pyi b/buildconfig/stubs/pygame/surface.pyi index 7c551af38d..564207e386 100644 --- a/buildconfig/stubs/pygame/surface.pyi +++ b/buildconfig/stubs/pygame/surface.pyi @@ -81,7 +81,9 @@ class Surface: def fblits( self, blit_sequence: Iterable[Tuple[Surface, Union[Coordinate, RectValue]]], - special_flags: int = 0, / + special_flags: int = 0, + cache: bool = False, + /, ) -> None: ... @overload def convert(self, surface: Surface, /) -> Surface: ... diff --git a/src_c/surface.c b/src_c/surface.c index e7053c1482..ac938c40ed 100644 --- a/src_c/surface.c +++ b/src_c/surface.c @@ -2381,6 +2381,29 @@ surf_fblits(pgSurfaceObject *self, PyObject *const *args, Py_ssize_t nargs) } } } + else if (PyIter_Check(blit_sequence)) { + is_generator = 1; + while ((item = PyIter_Next(blit_sequence))) { + if (cache) { + error = _surf_fblits_cached_item_check_and_blit( + self, item, blend_flags, &destinations, + &destinations_size); + } + else { + error = + _surf_fblits_item_check_and_blit(self, item, blend_flags); + } + if (error) { + goto on_error; + } + Py_DECREF(item); + } + + /* If the generator raises an exception */ + if (PyErr_Occurred()) { + return NULL; + } + } else { error = BLITS_ERR_SEQUENCE_REQUIRED; goto on_error; From 6ff00d6faad8f1c6a411470c3a02e8d10620af50 Mon Sep 17 00:00:00 2001 From: itzpr3d4t0r <103119829+itzpr3d4t0r@users.noreply.github.com> Date: Fri, 26 Apr 2024 17:36:57 +0200 Subject: [PATCH 03/28] more fixes and add missing stubs --- docs/reST/ref/surface.rst | 4 +++- src_c/alphablit.c | 7 +++---- src_c/doc/surface_doc.h | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/docs/reST/ref/surface.rst b/docs/reST/ref/surface.rst index 1862ae1395..c9e7b0ced2 100644 --- a/docs/reST/ref/surface.rst +++ b/docs/reST/ref/surface.rst @@ -197,7 +197,7 @@ .. method:: fblits | :sl:`draw many surfaces onto the calling surface at their corresponding location and the same special_flags` - | :sg:`fblits(blit_sequence=((source, dest), ...), special_flags=0, /) -> None` + | :sg:`fblits(blit_sequence=((source, dest), ...), special_flags=0, cache=False/) -> None` This method takes a sequence of tuples (source, dest) as input, where source is a Surface object and dest is its destination position on this Surface. It draws each source Surface @@ -208,6 +208,8 @@ :param blit_sequence: a sequence of (source, dest) :param special_flags: the flag(s) representing the blend mode used for each surface. See :doc:`special_flags_list` for a list of possible values. + :param cache: a boolean value that determines whether the surface should be cached + for better performance for repeated blitting. :returns: ``None`` diff --git a/src_c/alphablit.c b/src_c/alphablit.c index 087ca706ad..6bb22b8f8a 100644 --- a/src_c/alphablit.c +++ b/src_c/alphablit.c @@ -639,7 +639,6 @@ SoftCachedBlitPyGame(SDL_Surface *src, SDL_Surface *dst, int blend_flags, /* Everything is okay at the beginning... */ okay = 1; - int error = 0; /* Lock the destination if it's in hardware */ dst_locked = 0; @@ -665,18 +664,18 @@ SoftCachedBlitPyGame(SDL_Surface *src, SDL_Surface *dst, int blend_flags, PyObject *tup = list_items[i]; if (!PyTuple_Check(tup) || PyTuple_GET_SIZE(tup) != 2) { - error = -1; + okay = 0; break; } if (!pg_IntFromObj2(PyTuple_GET_ITEM(tup, 0), &x) || !pg_IntFromObj2(PyTuple_GET_ITEM(tup, 1), &y)) { - error = -1; + okay = 0; break; } if (x < 0 || x > dst->w - src->w || y < 0 || y > dst->h - src->h) { - error = -1; + okay = 0; break; } diff --git a/src_c/doc/surface_doc.h b/src_c/doc/surface_doc.h index 358629573f..e85b855d6c 100644 --- a/src_c/doc/surface_doc.h +++ b/src_c/doc/surface_doc.h @@ -2,7 +2,7 @@ #define DOC_SURFACE "Surface((width, height), flags=0, depth=0, masks=None) -> Surface\nSurface((width, height), flags=0, Surface) -> Surface\npygame object for representing images" #define DOC_SURFACE_BLIT "blit(source, dest, area=None, special_flags=0) -> Rect\ndraw another surface onto this one" #define DOC_SURFACE_BLITS "blits(blit_sequence=((source, dest), ...), doreturn=True) -> [Rect, ...] or None\nblits(((source, dest, area), ...)) -> [Rect, ...]\nblits(((source, dest, area, special_flags), ...)) -> [Rect, ...]\ndraw many images onto another" -#define DOC_SURFACE_FBLITS "fblits(blit_sequence=((source, dest), ...), special_flags=0, /) -> None\ndraw many surfaces onto the calling surface at their corresponding location and the same special_flags" +#define DOC_SURFACE_FBLITS "fblits(blit_sequence=((source, dest), ...), special_flags=0, cache=False/) -> None\ndraw many surfaces onto the calling surface at their corresponding location and the same special_flags" #define DOC_SURFACE_CONVERT "convert(surface, /) -> Surface\nconvert(depth, flags=0, /) -> Surface\nconvert(masks, flags=0, /) -> Surface\nconvert() -> Surface\nchange the pixel format of an image" #define DOC_SURFACE_CONVERTALPHA "convert_alpha() -> Surface\nchange the pixel format of an image including per pixel alphas" #define DOC_SURFACE_COPY "copy() -> Surface\ncreate a new copy of a Surface" From 69cbaf6719205f06cc1331349db5ad22e93f4a85 Mon Sep 17 00:00:00 2001 From: itzpr3d4t0r <103119829+itzpr3d4t0r@users.noreply.github.com> Date: Fri, 26 Apr 2024 17:39:38 +0200 Subject: [PATCH 04/28] removed unused variable --- src_c/surface.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src_c/surface.c b/src_c/surface.c index ac938c40ed..6856b25ce2 100644 --- a/src_c/surface.c +++ b/src_c/surface.c @@ -2218,7 +2218,6 @@ _surf_fblits_cached_item_check_and_blit(pgSurfaceObject *self, PyObject *item, SDL_Surface *subsurface; int suboffsetx = 0, suboffsety = 0; SDL_Rect orig_clip, sub_clip; - Py_ssize_t i; int error = 0; /* Check that the item is a tuple of length 2 */ From 7c107e4abbe18b65e026b076fb8bede585ed0bc8 Mon Sep 17 00:00:00 2001 From: itzpr3d4t0r <103119829+itzpr3d4t0r@users.noreply.github.com> Date: Fri, 26 Apr 2024 17:42:26 +0200 Subject: [PATCH 05/28] another fix --- src_c/alphablit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src_c/alphablit.c b/src_c/alphablit.c index 6bb22b8f8a..a0e9bf4d65 100644 --- a/src_c/alphablit.c +++ b/src_c/alphablit.c @@ -67,7 +67,7 @@ SoftCachedBlitPyGame(SDL_Surface *src, SDL_Surface *dst, int blend_flags, Uint32 ***destinations, Py_ssize_t destinations_size, PyObject *list); -static void +void pg_cached_blitcopy(SDL_Surface *src, SDL_Surface *dst, Uint32 **destinations, Py_ssize_t destinations_size); @@ -611,7 +611,7 @@ pg_IntFromObj2(PyObject *obj, int *val) return 1; } -static void +void pg_cached_blitcopy(SDL_Surface *src, SDL_Surface *dst, Uint32 **destinations, Py_ssize_t destinations_size) { From 52c48440c6ee140d0bf438e04cd11a692fccf452 Mon Sep 17 00:00:00 2001 From: itzpr3d4t0r <103119829+itzpr3d4t0r@users.noreply.github.com> Date: Fri, 26 Apr 2024 17:45:20 +0200 Subject: [PATCH 06/28] another fix --- src_c/surface.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src_c/surface.h b/src_c/surface.h index 5990f57815..9cfff53d4d 100644 --- a/src_c/surface.h +++ b/src_c/surface.h @@ -357,7 +357,7 @@ SoftCachedBlitPyGame(SDL_Surface *src, SDL_Surface *dst, int blend_flags, Uint32 ***destinations, Py_ssize_t destinations_size, PyObject *list); -static void +void pg_cached_blitcopy(SDL_Surface *src, SDL_Surface *dst, Uint32 **destinations, Py_ssize_t destinations_size); From 5508da34235937a898e57a3580f85c3c7cdcdf54 Mon Sep 17 00:00:00 2001 From: itzpr3d4t0r <103119829+itzpr3d4t0r@users.noreply.github.com> Date: Fri, 26 Apr 2024 17:55:22 +0200 Subject: [PATCH 07/28] remove unused variables and cast --- src_c/simd_blitters_avx2.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src_c/simd_blitters_avx2.c b/src_c/simd_blitters_avx2.c index 421f092f68..06991c0369 100644 --- a/src_c/simd_blitters_avx2.c +++ b/src_c/simd_blitters_avx2.c @@ -192,16 +192,14 @@ pg_avx2_at_runtime_but_uncompiled() #define CACHED_BLIT_SETUP_AVX2 \ Py_ssize_t j, k, y; \ - Uint32 *srcp; \ int n_iters_8 = src->w / 8, pxl_excess = src->w % 8; \ int src_skip = src->pitch / 4 - src->w; \ int dst_skip = dst->pitch / 4 - src->w; \ - int h, n; \ + int n; \ \ __m256i *srcp256 = (__m256i *)src->pixels; \ __m256i *dstp256; \ \ - __m256i mm256_src, mm256_dst; \ __m256i mask = \ _mm256_set_epi32(0, pxl_excess > 6 ? -1 : 0, pxl_excess > 5 ? -1 : 0, \ pxl_excess > 4 ? -1 : 0, pxl_excess > 3 ? -1 : 0, \ @@ -1689,7 +1687,7 @@ _pg_cached_blitcopy_avx2(SDL_Surface *src, SDL_Surface *dst, /* Blit the cache */ for (j = 0; j < destinations_size; j++) { - dstp256 = destinations[j]; + dstp256 = (__m256i *)destinations[j]; for (y = src->h, k = 0; y--;) { for (n = 0; n < n_iters_8; n++, k++, dstp256++) _mm256_storeu_si256(dstp256, cache[k]); From 0bc580e88589f41a3f860d5eebca761291a7b4d0 Mon Sep 17 00:00:00 2001 From: itzpr3d4t0r <103119829+itzpr3d4t0r@users.noreply.github.com> Date: Fri, 26 Apr 2024 20:54:14 +0200 Subject: [PATCH 08/28] moved sequence setup to _surf_fblits_cached_item_check_and_blit --- src_c/alphablit.c | 54 ++--------------------------------------------- src_c/surface.c | 25 +++++++++++++++++++++- src_c/surface.h | 3 +-- 3 files changed, 27 insertions(+), 55 deletions(-) diff --git a/src_c/alphablit.c b/src_c/alphablit.c index a0e9bf4d65..d348e3a6f0 100644 --- a/src_c/alphablit.c +++ b/src_c/alphablit.c @@ -64,8 +64,7 @@ SoftBlitPyGame(SDL_Surface *src, SDL_Rect *srcrect, SDL_Surface *dst, int SoftCachedBlitPyGame(SDL_Surface *src, SDL_Surface *dst, int blend_flags, - Uint32 ***destinations, Py_ssize_t destinations_size, - PyObject *list); + Uint32 ***destinations, Py_ssize_t destinations_size); void pg_cached_blitcopy(SDL_Surface *src, SDL_Surface *dst, Uint32 **destinations, @@ -590,27 +589,6 @@ SoftBlitPyGame(SDL_Surface *src, SDL_Rect *srcrect, SDL_Surface *dst, return (okay ? 0 : -1); } -static int -pg_IntFromObj2(PyObject *obj, int *val) -{ - int tmp_val; - - if (PyFloat_Check(obj)) { - double dv = PyFloat_AsDouble(obj); - tmp_val = (int)dv; - } - else { - tmp_val = PyLong_AsLong(obj); - } - - if (tmp_val == -1 && PyErr_Occurred()) { - PyErr_Clear(); - return 0; - } - *val = tmp_val; - return 1; -} - void pg_cached_blitcopy(SDL_Surface *src, SDL_Surface *dst, Uint32 **destinations, Py_ssize_t destinations_size) @@ -627,15 +605,13 @@ pg_cached_blitcopy(SDL_Surface *src, SDL_Surface *dst, Uint32 **destinations, int SoftCachedBlitPyGame(SDL_Surface *src, SDL_Surface *dst, int blend_flags, - Uint32 ***destinations, Py_ssize_t destinations_size, - PyObject *list) + Uint32 ***destinations, Py_ssize_t destinations_size) { int okay; int src_locked; int dst_locked; Uint32 colorkey; SDL_BlendMode src_blend; - Py_ssize_t i; /* Everything is okay at the beginning... */ okay = 1; @@ -657,32 +633,6 @@ SoftCachedBlitPyGame(SDL_Surface *src, SDL_Surface *dst, int blend_flags, src_locked = 1; } - /* load destinations */ - PyObject **list_items = PySequence_Fast_ITEMS(list); - for (i = 0; i < destinations_size; i++) { - int x, y; - PyObject *tup = list_items[i]; - - if (!PyTuple_Check(tup) || PyTuple_GET_SIZE(tup) != 2) { - okay = 0; - break; - } - - if (!pg_IntFromObj2(PyTuple_GET_ITEM(tup, 0), &x) || - !pg_IntFromObj2(PyTuple_GET_ITEM(tup, 1), &y)) { - okay = 0; - break; - } - - if (x < 0 || x > dst->w - src->w || y < 0 || y > dst->h - src->h) { - okay = 0; - break; - } - - (*destinations)[i] = (Uint32 *)dst->pixels + y * dst->pitch / 4 + x; - } - - /* Set up source and destination buffer pointers, and BLIT! */ if (okay) { switch (blend_flags) { case 0: diff --git a/src_c/surface.c b/src_c/surface.c index 6856b25ce2..67616865be 100644 --- a/src_c/surface.c +++ b/src_c/surface.c @@ -2219,6 +2219,7 @@ _surf_fblits_cached_item_check_and_blit(pgSurfaceObject *self, PyObject *item, int suboffsetx = 0, suboffsety = 0; SDL_Rect orig_clip, sub_clip; int error = 0; + Py_ssize_t i; /* Check that the item is a tuple of length 2 */ if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) { @@ -2274,6 +2275,28 @@ _surf_fblits_cached_item_check_and_blit(pgSurfaceObject *self, PyObject *item, } *allocated_size = new_size; + /* load destinations */ + PyObject **list_items = PySequence_Fast_ITEMS(pos_list); + for (i = 0; i < *allocated_size; i++) { + int x, y; + PyObject *tup = list_items[i]; + + if (!PyTuple_Check(tup) || PyTuple_GET_SIZE(tup) != 2) { + return FBLITS_ERR_TUPLE_REQUIRED; + } + + if (!pg_IntFromObj(PyTuple_GET_ITEM(tup, 0), &x) || + !pg_IntFromObj(PyTuple_GET_ITEM(tup, 1), &y)) { + return FBLITS_ERR_INCORRECT_ARGS_NUM; + } + + if (x < 0 || x > dst->w - src->w || y < 0 || y > dst->h - src->h) { + return BLITS_ERR_INVALID_DESTINATION; + } + + (*destinations)[i] = (Uint32 *)dst->pixels + y * dst->pitch / 4 + x; + } + if (self->subsurface) { PyObject *owner; struct pgSubSurface_Data *subdata; @@ -2306,7 +2329,7 @@ _surf_fblits_cached_item_check_and_blit(pgSurfaceObject *self, PyObject *item, pgSurface_Prep((pgSurfaceObject *)src_surf); error = SoftCachedBlitPyGame(src, dst, blend_flags, destinations, - *allocated_size, pos_list); + *allocated_size); if (subsurface) SDL_SetClipRect(subsurface, &orig_clip); diff --git a/src_c/surface.h b/src_c/surface.h index 9cfff53d4d..9c65889164 100644 --- a/src_c/surface.h +++ b/src_c/surface.h @@ -354,8 +354,7 @@ pygame_Blit(SDL_Surface *src, SDL_Rect *srcrect, SDL_Surface *dst, int SoftCachedBlitPyGame(SDL_Surface *src, SDL_Surface *dst, int blend_flags, - Uint32 ***destinations, Py_ssize_t destinations_size, - PyObject *list); + Uint32 ***destinations, Py_ssize_t destinations_size); void pg_cached_blitcopy(SDL_Surface *src, SDL_Surface *dst, Uint32 **destinations, From 0fba53e9bdaa0879b30c5a62ce6dddc7c97cf975 Mon Sep 17 00:00:00 2001 From: itzpr3d4t0r <103119829+itzpr3d4t0r@users.noreply.github.com> Date: Fri, 26 Apr 2024 22:28:07 +0200 Subject: [PATCH 09/28] Added SSE2 version --- src_c/alphablit.c | 6 +++ src_c/simd_blitters.h | 4 ++ src_c/simd_blitters_sse2.c | 79 ++++++++++++++++++++++++++++++++++++++ src_c/surface.c | 2 - 4 files changed, 89 insertions(+), 2 deletions(-) diff --git a/src_c/alphablit.c b/src_c/alphablit.c index d348e3a6f0..05fea621ec 100644 --- a/src_c/alphablit.c +++ b/src_c/alphablit.c @@ -599,6 +599,12 @@ pg_cached_blitcopy(SDL_Surface *src, SDL_Surface *dst, Uint32 **destinations, _pg_cached_blitcopy_avx2(src, dst, destinations, destinations_size); return; } +#if PG_ENABLE_SSE_NEON + if (pg_HasSSE_NEON()) { + _pg_cached_blitcopy_sse2(src, dst, destinations, destinations_size); + return; + } +#endif /* PG_ENABLE_SSE_NEON */ #endif /* SDL_BYTEORDER == SDL_LIL_ENDIAN */ #endif /* __EMSCRIPTEN__ */ } diff --git a/src_c/simd_blitters.h b/src_c/simd_blitters.h index 376650ca6f..1c480a8fdd 100644 --- a/src_c/simd_blitters.h +++ b/src_c/simd_blitters.h @@ -37,6 +37,10 @@ void blit_blend_rgb_min_sse2(SDL_BlitInfo *info); void blit_blend_premultiplied_sse2(SDL_BlitInfo *info); +void +_pg_cached_blitcopy_sse2(SDL_Surface *src, SDL_Surface *dst, + Uint32 **destinations, Py_ssize_t destinations_size); + #endif /* (defined(__SSE2__) || defined(PG_ENABLE_ARM_NEON)) */ /* Deliberately putting these outside of the preprocessor guards as I want to diff --git a/src_c/simd_blitters_sse2.c b/src_c/simd_blitters_sse2.c index 1c1c881fe7..9c71c2d9a0 100644 --- a/src_c/simd_blitters_sse2.c +++ b/src_c/simd_blitters_sse2.c @@ -139,6 +139,47 @@ pg_neon_at_runtime_but_uncompiled() dstp = (Uint32 *)dstp128 + dstskip; \ } +#define CACHED_BLIT_SETUP_SSE2 \ + Py_ssize_t j, k, y; \ + const int n_iters_4 = src->w / 4, pxl_excess = src->w % 4; \ + int src_skip = src->pitch / 4 - src->w; \ + int dst_skip = dst->pitch / 4 - src->w; \ + int n; \ + \ + __m128i *srcp128 = (__m128i *)src->pixels; \ + __m128i *dstp128; \ + \ + Py_ssize_t cache_size = (n_iters_4 + (pxl_excess ? 1 : 0)) * src->h; \ + __m128i *cache = (__m128i *)malloc(cache_size * sizeof(__m128i)); \ + if (!cache) { \ + return; \ + } \ + \ + /* Load the cache with the source pixels */ \ + for (j = src->h, k = 0; j--;) { \ + for (n = 0; n < n_iters_4; n++, k++, srcp128++) \ + cache[k] = _mm_loadu_si128(srcp128); \ + \ + if (pxl_excess) { \ + Uint32 *srcp = (Uint32 *)srcp128; \ + __m128i tmp = \ + _mm_loadu_si128((__m128i *)(srcp - 4 + pxl_excess)); \ + switch (pxl_excess) { \ + case 1: \ + cache[k++] = _mm_srli_si128(tmp, 12); \ + break; \ + case 2: \ + cache[k++] = _mm_srli_si128(tmp, 8); \ + break; \ + case 3: \ + cache[k++] = _mm_srli_si128(tmp, 4); \ + break; \ + } \ + } \ + \ + srcp128 = (__m128i *)((Uint32 *)srcp128 + src_skip + pxl_excess); \ + } + #if defined(__SSE2__) || defined(PG_ENABLE_ARM_NEON) void alphablit_alpha_sse2_argb_surf_alpha(SDL_BlitInfo *info) @@ -949,4 +990,42 @@ blit_blend_rgba_max_sse2(SDL_BlitInfo *info) SETUP_SSE2_BLITTER RUN_SSE2_BLITTER({ mm128_dst = _mm_max_epu8(mm128_dst, mm128_src); }) } + +void +_pg_cached_blitcopy_sse2(SDL_Surface *src, SDL_Surface *dst, + Uint32 **destinations, Py_ssize_t destinations_size) +{ + CACHED_BLIT_SETUP_SSE2; + + /* Blit the cache */ + for (j = 0; j < destinations_size; j++) { + dstp128 = (__m128i *)destinations[j]; + for (y = src->h, k = 0; y--;) { + for (n = 0; n < n_iters_4; n++, k++, dstp128++) + _mm_storeu_si128(dstp128, cache[k]); + + if (pxl_excess) { + Uint32 *dstp32; + switch (pxl_excess) { + case 1: + dstp32 = (Uint32 *)dstp128; + *dstp32 = cache[k++].m128i_u32[0]; + break; + case 2: + _mm_storel_epi64(dstp128, cache[k++]); + break; + case 3: + _mm_storel_epi64(dstp128, cache[k++]); + dstp32 = (Uint32 *)dstp128; + *dstp32 = cache[k++].m128i_u32[2]; + break; + } + } + + dstp128 = (__m128i *)((Uint32 *)dstp128 + dst_skip + pxl_excess); + } + } + + free(cache); +} #endif /* __SSE2__ || PG_ENABLE_ARM_NEON*/ diff --git a/src_c/surface.c b/src_c/surface.c index 67616865be..39889a0492 100644 --- a/src_c/surface.c +++ b/src_c/surface.c @@ -262,8 +262,6 @@ static SDL_Surface * pg_DisplayFormat(SDL_Surface *surface); static int _PgSurface_SrcAlpha(SDL_Surface *surf); -int -pg_HasSurfaceRLE(SDL_Surface *surface); static PyGetSetDef surface_getsets[] = { {"_pixels_address", (getter)surf_get_pixels_address, NULL, From 54dd0fa15509f551bddd758d461d49c2f183e7dc Mon Sep 17 00:00:00 2001 From: itzpr3d4t0r <103119829+itzpr3d4t0r@users.noreply.github.com> Date: Fri, 26 Apr 2024 22:49:55 +0200 Subject: [PATCH 10/28] fix --- src_c/surface.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src_c/surface.c b/src_c/surface.c index 39889a0492..67616865be 100644 --- a/src_c/surface.c +++ b/src_c/surface.c @@ -262,6 +262,8 @@ static SDL_Surface * pg_DisplayFormat(SDL_Surface *surface); static int _PgSurface_SrcAlpha(SDL_Surface *surf); +int +pg_HasSurfaceRLE(SDL_Surface *surface); static PyGetSetDef surface_getsets[] = { {"_pixels_address", (getter)surf_get_pixels_address, NULL, From 261472bb45a4e17496d743b04f3e8d4a61ddca79 Mon Sep 17 00:00:00 2001 From: itzpr3d4t0r <103119829+itzpr3d4t0r@users.noreply.github.com> Date: Fri, 26 Apr 2024 23:02:18 +0200 Subject: [PATCH 11/28] tentative fix --- src_c/simd_blitters_sse2.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src_c/simd_blitters_sse2.c b/src_c/simd_blitters_sse2.c index 9c71c2d9a0..246c725993 100644 --- a/src_c/simd_blitters_sse2.c +++ b/src_c/simd_blitters_sse2.c @@ -1009,7 +1009,11 @@ _pg_cached_blitcopy_sse2(SDL_Surface *src, SDL_Surface *dst, switch (pxl_excess) { case 1: dstp32 = (Uint32 *)dstp128; +#if defined(WIN32) && defined(_MSC_VER) *dstp32 = cache[k++].m128i_u32[0]; +#else + *dstp32 = _mm_cvtsi128_si32(cache[k++]); +#endif break; case 2: _mm_storel_epi64(dstp128, cache[k++]); @@ -1017,7 +1021,12 @@ _pg_cached_blitcopy_sse2(SDL_Surface *src, SDL_Surface *dst, case 3: _mm_storel_epi64(dstp128, cache[k++]); dstp32 = (Uint32 *)dstp128; +#if defined(WIN32) && defined(_MSC_VER) *dstp32 = cache[k++].m128i_u32[2]; +#else + __m128i temp = _mm_srli_si128(cache[k++], 8); + *dstp32 = _mm_cvtsi128_si32(temp); +#endif break; } } From f2fd2e9592e67ac7197be9adf91380763baf0a8f Mon Sep 17 00:00:00 2001 From: itzpr3d4t0r <103119829+itzpr3d4t0r@users.noreply.github.com> Date: Sat, 27 Apr 2024 18:27:54 +0200 Subject: [PATCH 12/28] Massively simplified code for cached blitcopy, removed avx/sse versions in favour of a single implementation with memcpy --- src_c/alphablit.c | 23 +++++----- src_c/simd_blitters.h | 7 --- src_c/simd_blitters_avx2.c | 69 +----------------------------- src_c/simd_blitters_sse2.c | 88 -------------------------------------- 4 files changed, 12 insertions(+), 175 deletions(-) diff --git a/src_c/alphablit.c b/src_c/alphablit.c index 05fea621ec..b9238ac11f 100644 --- a/src_c/alphablit.c +++ b/src_c/alphablit.c @@ -593,20 +593,17 @@ void pg_cached_blitcopy(SDL_Surface *src, SDL_Surface *dst, Uint32 **destinations, Py_ssize_t destinations_size) { -#if !defined(__EMSCRIPTEN__) -#if SDL_BYTEORDER == SDL_LIL_ENDIAN - if (pg_has_avx2()) { - _pg_cached_blitcopy_avx2(src, dst, destinations, destinations_size); - return; - } -#if PG_ENABLE_SSE_NEON - if (pg_HasSSE_NEON()) { - _pg_cached_blitcopy_sse2(src, dst, destinations, destinations_size); - return; + Py_ssize_t i, y; + for (i = 0; i < destinations_size; i++) { + Uint32 *dstp32 = destinations[i]; + Uint32 *srcp32 = (Uint32 *)src->pixels; + y = src->h; + while (y--) { + memcpy(dstp32, srcp32, src->pitch); + srcp32 += src->w; + dstp32 += dst->w; + } } -#endif /* PG_ENABLE_SSE_NEON */ -#endif /* SDL_BYTEORDER == SDL_LIL_ENDIAN */ -#endif /* __EMSCRIPTEN__ */ } int diff --git a/src_c/simd_blitters.h b/src_c/simd_blitters.h index 1c480a8fdd..906f60fdae 100644 --- a/src_c/simd_blitters.h +++ b/src_c/simd_blitters.h @@ -37,10 +37,6 @@ void blit_blend_rgb_min_sse2(SDL_BlitInfo *info); void blit_blend_premultiplied_sse2(SDL_BlitInfo *info); -void -_pg_cached_blitcopy_sse2(SDL_Surface *src, SDL_Surface *dst, - Uint32 **destinations, Py_ssize_t destinations_size); - #endif /* (defined(__SSE2__) || defined(PG_ENABLE_ARM_NEON)) */ /* Deliberately putting these outside of the preprocessor guards as I want to @@ -87,6 +83,3 @@ void blit_blend_premultiplied_avx2(SDL_BlitInfo *info); void premul_surf_color_by_alpha_avx2(SDL_Surface *src, SDL_Surface *dst); -void -_pg_cached_blitcopy_avx2(SDL_Surface *src, SDL_Surface *dst, - Uint32 **destinations, Py_ssize_t destinations_size); diff --git a/src_c/simd_blitters_avx2.c b/src_c/simd_blitters_avx2.c index 06991c0369..f2f0161141 100644 --- a/src_c/simd_blitters_avx2.c +++ b/src_c/simd_blitters_avx2.c @@ -4,6 +4,8 @@ #include #endif /* defined(HAVE_IMMINTRIN_H) && !defined(SDL_DISABLE_IMMINTRIN_H) */ +#undef __AVX2__ + #define BAD_AVX2_FUNCTION_CALL \ printf( \ "Fatal Error: Attempted calling an AVX2 function when both compile " \ @@ -190,39 +192,6 @@ pg_avx2_at_runtime_but_uncompiled() _mm256_srli_epi16( \ _mm256_mulhi_epu16(MM256I, _mm256_set1_epi16((short)0x8081)), 7); -#define CACHED_BLIT_SETUP_AVX2 \ - Py_ssize_t j, k, y; \ - int n_iters_8 = src->w / 8, pxl_excess = src->w % 8; \ - int src_skip = src->pitch / 4 - src->w; \ - int dst_skip = dst->pitch / 4 - src->w; \ - int n; \ - \ - __m256i *srcp256 = (__m256i *)src->pixels; \ - __m256i *dstp256; \ - \ - __m256i mask = \ - _mm256_set_epi32(0, pxl_excess > 6 ? -1 : 0, pxl_excess > 5 ? -1 : 0, \ - pxl_excess > 4 ? -1 : 0, pxl_excess > 3 ? -1 : 0, \ - pxl_excess > 2 ? -1 : 0, pxl_excess > 1 ? -1 : 0, \ - pxl_excess > 0 ? -1 : 0); \ - \ - Py_ssize_t cache_size = (n_iters_8 + (pxl_excess ? 1 : 0)) * src->h; \ - __m256i *cache = (__m256i *)malloc(cache_size * sizeof(__m256i)); \ - if (!cache) { \ - return; \ - } \ - \ - /* Load the cache with the source pixels */ \ - for (j = src->h, k = 0; j--;) { \ - for (n = 0; n < n_iters_8; n++, k++, srcp256++) \ - cache[k] = _mm256_loadu_si256(srcp256); \ - \ - if (pxl_excess) \ - cache[k++] = _mm256_maskload_epi32((int *)srcp256, mask); \ - \ - srcp256 = (__m256i *)((Uint32 *)srcp256 + src_skip + pxl_excess); \ - } - #if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \ !defined(SDL_DISABLE_IMMINTRIN_H) void @@ -1676,37 +1645,3 @@ premul_surf_color_by_alpha_avx2(SDL_Surface *src, SDL_Surface *dst) } #endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \ !defined(SDL_DISABLE_IMMINTRIN_H) */ - -#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \ - !defined(SDL_DISABLE_IMMINTRIN_H) -void -_pg_cached_blitcopy_avx2(SDL_Surface *src, SDL_Surface *dst, - Uint32 **destinations, Py_ssize_t destinations_size) -{ - CACHED_BLIT_SETUP_AVX2; - - /* Blit the cache */ - for (j = 0; j < destinations_size; j++) { - dstp256 = (__m256i *)destinations[j]; - for (y = src->h, k = 0; y--;) { - for (n = 0; n < n_iters_8; n++, k++, dstp256++) - _mm256_storeu_si256(dstp256, cache[k]); - - if (pxl_excess) - _mm256_maskstore_epi32((int *)dstp256, mask, cache[k++]); - - dstp256 = (__m256i *)((Uint32 *)dstp256 + dst_skip + pxl_excess); - } - } - - free(cache); -} -#else -void -_pg_cached_blitcopy_avx2(SDL_Surface *src, SDL_Surface *dst, - Uint32 **destinations, Py_ssize_t destinations_size) -{ - BAD_AVX2_FUNCTION_CALL; -} -#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \ -!defined(SDL_DISABLE_IMMINTRIN_H) */ diff --git a/src_c/simd_blitters_sse2.c b/src_c/simd_blitters_sse2.c index 246c725993..1c1c881fe7 100644 --- a/src_c/simd_blitters_sse2.c +++ b/src_c/simd_blitters_sse2.c @@ -139,47 +139,6 @@ pg_neon_at_runtime_but_uncompiled() dstp = (Uint32 *)dstp128 + dstskip; \ } -#define CACHED_BLIT_SETUP_SSE2 \ - Py_ssize_t j, k, y; \ - const int n_iters_4 = src->w / 4, pxl_excess = src->w % 4; \ - int src_skip = src->pitch / 4 - src->w; \ - int dst_skip = dst->pitch / 4 - src->w; \ - int n; \ - \ - __m128i *srcp128 = (__m128i *)src->pixels; \ - __m128i *dstp128; \ - \ - Py_ssize_t cache_size = (n_iters_4 + (pxl_excess ? 1 : 0)) * src->h; \ - __m128i *cache = (__m128i *)malloc(cache_size * sizeof(__m128i)); \ - if (!cache) { \ - return; \ - } \ - \ - /* Load the cache with the source pixels */ \ - for (j = src->h, k = 0; j--;) { \ - for (n = 0; n < n_iters_4; n++, k++, srcp128++) \ - cache[k] = _mm_loadu_si128(srcp128); \ - \ - if (pxl_excess) { \ - Uint32 *srcp = (Uint32 *)srcp128; \ - __m128i tmp = \ - _mm_loadu_si128((__m128i *)(srcp - 4 + pxl_excess)); \ - switch (pxl_excess) { \ - case 1: \ - cache[k++] = _mm_srli_si128(tmp, 12); \ - break; \ - case 2: \ - cache[k++] = _mm_srli_si128(tmp, 8); \ - break; \ - case 3: \ - cache[k++] = _mm_srli_si128(tmp, 4); \ - break; \ - } \ - } \ - \ - srcp128 = (__m128i *)((Uint32 *)srcp128 + src_skip + pxl_excess); \ - } - #if defined(__SSE2__) || defined(PG_ENABLE_ARM_NEON) void alphablit_alpha_sse2_argb_surf_alpha(SDL_BlitInfo *info) @@ -990,51 +949,4 @@ blit_blend_rgba_max_sse2(SDL_BlitInfo *info) SETUP_SSE2_BLITTER RUN_SSE2_BLITTER({ mm128_dst = _mm_max_epu8(mm128_dst, mm128_src); }) } - -void -_pg_cached_blitcopy_sse2(SDL_Surface *src, SDL_Surface *dst, - Uint32 **destinations, Py_ssize_t destinations_size) -{ - CACHED_BLIT_SETUP_SSE2; - - /* Blit the cache */ - for (j = 0; j < destinations_size; j++) { - dstp128 = (__m128i *)destinations[j]; - for (y = src->h, k = 0; y--;) { - for (n = 0; n < n_iters_4; n++, k++, dstp128++) - _mm_storeu_si128(dstp128, cache[k]); - - if (pxl_excess) { - Uint32 *dstp32; - switch (pxl_excess) { - case 1: - dstp32 = (Uint32 *)dstp128; -#if defined(WIN32) && defined(_MSC_VER) - *dstp32 = cache[k++].m128i_u32[0]; -#else - *dstp32 = _mm_cvtsi128_si32(cache[k++]); -#endif - break; - case 2: - _mm_storel_epi64(dstp128, cache[k++]); - break; - case 3: - _mm_storel_epi64(dstp128, cache[k++]); - dstp32 = (Uint32 *)dstp128; -#if defined(WIN32) && defined(_MSC_VER) - *dstp32 = cache[k++].m128i_u32[2]; -#else - __m128i temp = _mm_srli_si128(cache[k++], 8); - *dstp32 = _mm_cvtsi128_si32(temp); -#endif - break; - } - } - - dstp128 = (__m128i *)((Uint32 *)dstp128 + dst_skip + pxl_excess); - } - } - - free(cache); -} #endif /* __SSE2__ || PG_ENABLE_ARM_NEON*/ From a6ddfef7d9a76ed84ee36fd98d345e4ce92568b6 Mon Sep 17 00:00:00 2001 From: itzpr3d4t0r <103119829+itzpr3d4t0r@users.noreply.github.com> Date: Sat, 27 Apr 2024 18:28:38 +0200 Subject: [PATCH 13/28] forgot about that --- src_c/simd_blitters_avx2.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src_c/simd_blitters_avx2.c b/src_c/simd_blitters_avx2.c index f2f0161141..d113b2b87e 100644 --- a/src_c/simd_blitters_avx2.c +++ b/src_c/simd_blitters_avx2.c @@ -4,8 +4,6 @@ #include #endif /* defined(HAVE_IMMINTRIN_H) && !defined(SDL_DISABLE_IMMINTRIN_H) */ -#undef __AVX2__ - #define BAD_AVX2_FUNCTION_CALL \ printf( \ "Fatal Error: Attempted calling an AVX2 function when both compile " \ From 41b68adb9168456376c9f444b6dd6d2084e2a2e8 Mon Sep 17 00:00:00 2001 From: itzpr3d4t0r <103119829+itzpr3d4t0r@users.noreply.github.com> Date: Tue, 30 Apr 2024 16:03:20 +0200 Subject: [PATCH 14/28] Can now partially blit surfaces onto the destination. --- src_c/alphablit.c | 38 ++++++++++++---------- src_c/surface.c | 81 +++++++++++++++++++++++++++++++---------------- src_c/surface.h | 17 ++++++++-- 3 files changed, 90 insertions(+), 46 deletions(-) diff --git a/src_c/alphablit.c b/src_c/alphablit.c index b9238ac11f..8326ee2390 100644 --- a/src_c/alphablit.c +++ b/src_c/alphablit.c @@ -64,11 +64,11 @@ SoftBlitPyGame(SDL_Surface *src, SDL_Rect *srcrect, SDL_Surface *dst, int SoftCachedBlitPyGame(SDL_Surface *src, SDL_Surface *dst, int blend_flags, - Uint32 ***destinations, Py_ssize_t destinations_size); + BlitSequence *destinations); void -pg_cached_blitcopy(SDL_Surface *src, SDL_Surface *dst, Uint32 **destinations, - Py_ssize_t destinations_size); +pg_cached_blitcopy(SDL_Surface *src, SDL_Surface *dst, + BlitSequence *destinations); extern int SDL_RLESurface(SDL_Surface *surface); @@ -590,25 +590,31 @@ SoftBlitPyGame(SDL_Surface *src, SDL_Rect *srcrect, SDL_Surface *dst, } void -pg_cached_blitcopy(SDL_Surface *src, SDL_Surface *dst, Uint32 **destinations, - Py_ssize_t destinations_size) +pg_cached_blitcopy(SDL_Surface *src, SDL_Surface *dst, + BlitSequence *destinations) { - Py_ssize_t i, y; - for (i = 0; i < destinations_size; i++) { - Uint32 *dstp32 = destinations[i]; - Uint32 *srcp32 = (Uint32 *)src->pixels; - y = src->h; - while (y--) { - memcpy(dstp32, srcp32, src->pitch); - srcp32 += src->w; - dstp32 += dst->w; + Py_ssize_t i; + for (i = 0; i < destinations->size; i++) { + CachedBlitDest *item = &destinations->sequence[i]; + + const Py_ssize_t src_pitch = item->w * sizeof(Uint32); + const Py_ssize_t src_skip = src->pitch / 4; + const Py_ssize_t dst_skip = dst->pitch / 4; + + Uint32 *srcp32 = (Uint32 *)src->pixels + item->x + item->y * src_skip; + Uint32 *dstp32 = item->pixels; + + while (item->h--) { + memcpy(dstp32, srcp32, src_pitch); + srcp32 += src_skip; + dstp32 += dst_skip; } } } int SoftCachedBlitPyGame(SDL_Surface *src, SDL_Surface *dst, int blend_flags, - Uint32 ***destinations, Py_ssize_t destinations_size) + BlitSequence *destinations) { int okay; int src_locked; @@ -648,7 +654,7 @@ SoftCachedBlitPyGame(SDL_Surface *src, SDL_Surface *dst, int blend_flags, } /* blitcopy */ - pg_cached_blitcopy(src, dst, *destinations, destinations_size); + pg_cached_blitcopy(src, dst, destinations); break; default: okay = 0; diff --git a/src_c/surface.c b/src_c/surface.c index 67616865be..e87382a6aa 100644 --- a/src_c/surface.c +++ b/src_c/surface.c @@ -2210,8 +2210,7 @@ _surf_fblits_item_check_and_blit(pgSurfaceObject *self, PyObject *item, int _surf_fblits_cached_item_check_and_blit(pgSurfaceObject *self, PyObject *item, int blend_flags, - Uint32 ***destinations, - Py_ssize_t *allocated_size) + BlitSequence *destinations) { PyObject *src_surf, *pos_list; SDL_Surface *src, *dst = pgSurface_AsSurface(self); @@ -2258,26 +2257,34 @@ _surf_fblits_cached_item_check_and_blit(pgSurfaceObject *self, PyObject *item, /* manage destinations memory allocation or reallocation */ Py_ssize_t new_size = PyList_GET_SIZE(pos_list); - if (!*destinations) { - *destinations = (Uint32 **)malloc(new_size * sizeof(Uint32 *)); - if (!*destinations) { + if (destinations->sequence == NULL) { + destinations->sequence = + (CachedBlitDest *)malloc(new_size * sizeof(CachedBlitDest)); + destinations->size = destinations->alloc_size = new_size; + if (!destinations->sequence) { return FBLITS_ERR_NO_MEMORY; } } - else { - if (new_size > *allocated_size) { - *destinations = - (Uint32 **)realloc(*destinations, new_size * sizeof(Uint32 *)); + else if (new_size > 0 && new_size <= destinations->alloc_size) { + destinations->size = new_size; + } + else if (new_size > destinations->alloc_size) { + destinations->sequence = (CachedBlitDest *)realloc( + destinations->sequence, new_size * sizeof(CachedBlitDest)); - if (!*destinations) - return FBLITS_ERR_NO_MEMORY; - } + if (!destinations->sequence) + return FBLITS_ERR_NO_MEMORY; + + destinations->size = destinations->alloc_size = new_size; + } + else { + return FBLITS_ERR_INCORRECT_ARGS_NUM; } - *allocated_size = new_size; /* load destinations */ PyObject **list_items = PySequence_Fast_ITEMS(pos_list); - for (i = 0; i < *allocated_size; i++) { + Py_ssize_t current_size = 0; + for (i = 0; i < destinations->size; i++) { int x, y; PyObject *tup = list_items[i]; @@ -2290,13 +2297,36 @@ _surf_fblits_cached_item_check_and_blit(pgSurfaceObject *self, PyObject *item, return FBLITS_ERR_INCORRECT_ARGS_NUM; } - if (x < 0 || x > dst->w - src->w || y < 0 || y > dst->h - src->h) { - return BLITS_ERR_INVALID_DESTINATION; + if (x < -src->w || x > dst->w || y < -src->h || y > dst->h) + continue; /* Skip out of bounds destinations */ + + CachedBlitDest *blit_struct = &destinations->sequence[current_size++]; + blit_struct->pixels = (Uint32 *)dst->pixels; + + if (x < 0) { + blit_struct->w = src->w + x; + blit_struct->x = -x; + } + else { + blit_struct->pixels += x; + blit_struct->w = x > dst->w - src->w ? dst->w - x : src->w; + blit_struct->x = 0; } - (*destinations)[i] = (Uint32 *)dst->pixels + y * dst->pitch / 4 + x; + if (y < 0) { + blit_struct->h = src->h + y; + blit_struct->y = -y; + } + else { + blit_struct->pixels += y * dst->pitch / 4; + blit_struct->h = y > dst->h - src->h ? dst->h - y : src->h; + blit_struct->y = 0; + } } + if (!(destinations->size = current_size)) + return 0; + if (self->subsurface) { PyObject *owner; struct pgSubSurface_Data *subdata; @@ -2326,10 +2356,10 @@ _surf_fblits_cached_item_check_and_blit(pgSurfaceObject *self, PyObject *item, pgSurface_Prep(self); subsurface = NULL; } + pgSurface_Prep((pgSurfaceObject *)src_surf); - error = SoftCachedBlitPyGame(src, dst, blend_flags, destinations, - *allocated_size); + error = SoftCachedBlitPyGame(src, dst, blend_flags, destinations); if (subsurface) SDL_SetClipRect(subsurface, &orig_clip); @@ -2351,8 +2381,7 @@ surf_fblits(pgSurfaceObject *self, PyObject *const *args, Py_ssize_t nargs) int error = 0; int is_generator = 0; int cache = 0; - Uint32 **destinations = NULL; - Py_ssize_t destinations_size = 0; + BlitSequence destinations = {NULL, 0, 0}; if (nargs == 0 || nargs > 3) { error = FBLITS_ERR_INCORRECT_ARGS_NUM; @@ -2391,8 +2420,7 @@ surf_fblits(pgSurfaceObject *self, PyObject *const *args, Py_ssize_t nargs) item = sequence_items[i]; if (cache) { error = _surf_fblits_cached_item_check_and_blit( - self, item, blend_flags, &destinations, - &destinations_size); + self, item, blend_flags, &destinations); } else { error = @@ -2408,8 +2436,7 @@ surf_fblits(pgSurfaceObject *self, PyObject *const *args, Py_ssize_t nargs) while ((item = PyIter_Next(blit_sequence))) { if (cache) { error = _surf_fblits_cached_item_check_and_blit( - self, item, blend_flags, &destinations, - &destinations_size); + self, item, blend_flags, &destinations); } else { error = @@ -2431,7 +2458,7 @@ surf_fblits(pgSurfaceObject *self, PyObject *const *args, Py_ssize_t nargs) goto on_error; } - free(destinations); + free(destinations.sequence); Py_RETURN_NONE; @@ -2439,7 +2466,7 @@ surf_fblits(pgSurfaceObject *self, PyObject *const *args, Py_ssize_t nargs) if (is_generator) { Py_XDECREF(item); } - free(destinations); + free(destinations.sequence); switch (error) { case BLITS_ERR_SEQUENCE_REQUIRED: return RAISE( diff --git a/src_c/surface.h b/src_c/surface.h index 9c65889164..3bb32dbd65 100644 --- a/src_c/surface.h +++ b/src_c/surface.h @@ -337,6 +337,17 @@ } while (0) #endif +typedef struct { + Uint32 *pixels; + int w, h, x, y; +} CachedBlitDest; + +typedef struct { + CachedBlitDest *sequence; + Py_ssize_t alloc_size; + Py_ssize_t size; +} BlitSequence; + int surface_fill_blend(SDL_Surface *surface, SDL_Rect *rect, Uint32 color, int blendargs); @@ -354,11 +365,11 @@ pygame_Blit(SDL_Surface *src, SDL_Rect *srcrect, SDL_Surface *dst, int SoftCachedBlitPyGame(SDL_Surface *src, SDL_Surface *dst, int blend_flags, - Uint32 ***destinations, Py_ssize_t destinations_size); + BlitSequence *destinations); void -pg_cached_blitcopy(SDL_Surface *src, SDL_Surface *dst, Uint32 **destinations, - Py_ssize_t destinations_size); +pg_cached_blitcopy(SDL_Surface *src, SDL_Surface *dst, + BlitSequence *destinations); int premul_surf_color_by_alpha(SDL_Surface *src, SDL_Surface *dst); From f24c4eb87196aacd1dd519dafc555a8ca7bcc32f Mon Sep 17 00:00:00 2001 From: itzpr3d4t0r <103119829+itzpr3d4t0r@users.noreply.github.com> Date: Fri, 3 May 2024 13:34:58 +0200 Subject: [PATCH 15/28] use SDL_HasColorKey --- src_c/alphablit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src_c/alphablit.c b/src_c/alphablit.c index 8326ee2390..166421a5ff 100644 --- a/src_c/alphablit.c +++ b/src_c/alphablit.c @@ -648,7 +648,7 @@ SoftCachedBlitPyGame(SDL_Surface *src, SDL_Surface *dst, int blend_flags, /* unhandled cases */ if (SDL_GetSurfaceBlendMode(src, &src_blend) != 0 || (src_blend == SDL_BLENDMODE_NONE && src->format->Amask) || - SDL_GetColorKey(src, &colorkey) == 0) { + SDL_HasColorKey(src)) { okay = 0; break; } From 701be3552922dcba2f781bc18d02a8ee575bea8a Mon Sep 17 00:00:00 2001 From: itzpr3d4t0r <103119829+itzpr3d4t0r@users.noreply.github.com> Date: Fri, 3 May 2024 13:39:51 +0200 Subject: [PATCH 16/28] remove unused variable --- src_c/alphablit.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src_c/alphablit.c b/src_c/alphablit.c index 166421a5ff..1c9346632e 100644 --- a/src_c/alphablit.c +++ b/src_c/alphablit.c @@ -619,7 +619,6 @@ SoftCachedBlitPyGame(SDL_Surface *src, SDL_Surface *dst, int blend_flags, int okay; int src_locked; int dst_locked; - Uint32 colorkey; SDL_BlendMode src_blend; /* Everything is okay at the beginning... */ From 57e2aaa13c4c875e1044884079838fb06ef990dc Mon Sep 17 00:00:00 2001 From: itzpr3d4t0r <103119829+itzpr3d4t0r@users.noreply.github.com> Date: Fri, 3 May 2024 15:19:43 +0200 Subject: [PATCH 17/28] cleanup, always using realloc now, added proper error messages. --- src_c/alphablit.c | 25 ++++++++----------------- src_c/surface.c | 36 +++++++++++++++++++++--------------- 2 files changed, 29 insertions(+), 32 deletions(-) diff --git a/src_c/alphablit.c b/src_c/alphablit.c index 1c9346632e..d1e0a799f8 100644 --- a/src_c/alphablit.c +++ b/src_c/alphablit.c @@ -597,9 +597,9 @@ pg_cached_blitcopy(SDL_Surface *src, SDL_Surface *dst, for (i = 0; i < destinations->size; i++) { CachedBlitDest *item = &destinations->sequence[i]; - const Py_ssize_t src_pitch = item->w * sizeof(Uint32); - const Py_ssize_t src_skip = src->pitch / 4; - const Py_ssize_t dst_skip = dst->pitch / 4; + const int src_pitch = item->w * sizeof(Uint32); + const int src_skip = src->pitch / 4; + const int dst_skip = dst->pitch / 4; Uint32 *srcp32 = (Uint32 *)src->pixels + item->x + item->y * src_skip; Uint32 *dstp32 = item->pixels; @@ -616,24 +616,15 @@ int SoftCachedBlitPyGame(SDL_Surface *src, SDL_Surface *dst, int blend_flags, BlitSequence *destinations) { - int okay; - int src_locked; - int dst_locked; - SDL_BlendMode src_blend; + int okay = 1; + int src_locked = 0, dst_locked = 0; - /* Everything is okay at the beginning... */ - okay = 1; - - /* Lock the destination if it's in hardware */ - dst_locked = 0; if (SDL_MUSTLOCK(dst)) { if (SDL_LockSurface(dst) < 0) okay = 0; else dst_locked = 1; } - /* Lock the source if it's in hardware */ - src_locked = 0; if (SDL_MUSTLOCK(src)) { if (SDL_LockSurface(src) < 0) okay = 0; @@ -642,6 +633,7 @@ SoftCachedBlitPyGame(SDL_Surface *src, SDL_Surface *dst, int blend_flags, } if (okay) { + SDL_BlendMode src_blend; switch (blend_flags) { case 0: /* unhandled cases */ @@ -661,13 +653,12 @@ SoftCachedBlitPyGame(SDL_Surface *src, SDL_Surface *dst, int blend_flags, } } - /* We need to unlock the surfaces if they're locked */ if (dst_locked) SDL_UnlockSurface(dst); if (src_locked) SDL_UnlockSurface(src); - /* Blit is done! */ - return (okay ? 0 : -1); + + return okay ? 0 : -1; } /* --------------------------------------------------------- */ diff --git a/src_c/surface.c b/src_c/surface.c index e87382a6aa..4bc23ccd35 100644 --- a/src_c/surface.c +++ b/src_c/surface.c @@ -2157,6 +2157,8 @@ surf_blits(pgSurfaceObject *self, PyObject *args, PyObject *keywds) #define FBLITS_ERR_CACHE_RLE_NOT_SUPPORTED 16 #define FBLITS_ERR_FLAG_NOT_SUPPORTED 17 #define FBLITS_ERR_NO_MEMORY 18 +#define FBLITS_ERR_INVALID_SEQUENCE_LENGTH 19 +#define FBLITS_ERR_INVALID_DESTINATION 20 int _surf_fblits_item_check_and_blit(pgSurfaceObject *self, PyObject *item, @@ -2255,20 +2257,10 @@ _surf_fblits_cached_item_check_and_blit(pgSurfaceObject *self, PyObject *item, return FBLITS_ERR_CACHE_RLE_NOT_SUPPORTED; } - /* manage destinations memory allocation or reallocation */ + /* manage destinations memory */ Py_ssize_t new_size = PyList_GET_SIZE(pos_list); - if (destinations->sequence == NULL) { - destinations->sequence = - (CachedBlitDest *)malloc(new_size * sizeof(CachedBlitDest)); - destinations->size = destinations->alloc_size = new_size; - if (!destinations->sequence) { - return FBLITS_ERR_NO_MEMORY; - } - } - else if (new_size > 0 && new_size <= destinations->alloc_size) { - destinations->size = new_size; - } - else if (new_size > destinations->alloc_size) { + if (new_size > destinations->alloc_size) { + /* no realloc as we don't care about the previous destinations */ destinations->sequence = (CachedBlitDest *)realloc( destinations->sequence, new_size * sizeof(CachedBlitDest)); @@ -2277,8 +2269,13 @@ _surf_fblits_cached_item_check_and_blit(pgSurfaceObject *self, PyObject *item, destinations->size = destinations->alloc_size = new_size; } + else if (new_size > 0 && new_size <= destinations->alloc_size) { + destinations->size = new_size; + } else { - return FBLITS_ERR_INCORRECT_ARGS_NUM; + if (new_size == 0) + return 0; + return FBLITS_ERR_INVALID_SEQUENCE_LENGTH; } /* load destinations */ @@ -2294,7 +2291,7 @@ _surf_fblits_cached_item_check_and_blit(pgSurfaceObject *self, PyObject *item, if (!pg_IntFromObj(PyTuple_GET_ITEM(tup, 0), &x) || !pg_IntFromObj(PyTuple_GET_ITEM(tup, 1), &y)) { - return FBLITS_ERR_INCORRECT_ARGS_NUM; + return FBLITS_ERR_INVALID_DESTINATION; } if (x < -src->w || x > dst->w || y < -src->h || y > dst->h) @@ -2367,6 +2364,9 @@ _surf_fblits_cached_item_check_and_blit(pgSurfaceObject *self, PyObject *item, pgSurface_Unprep(self); pgSurface_Unprep((pgSurfaceObject *)src_surf); + if (error == -1) + error = BLITS_ERR_BLIT_FAIL; + return error; } @@ -2518,6 +2518,12 @@ surf_fblits(pgSurfaceObject *self, PyObject *const *args, Py_ssize_t nargs) "supported for this operation"); case FBLITS_ERR_NO_MEMORY: return RAISE(PyExc_MemoryError, "No memory available"); + case FBLITS_ERR_INVALID_SEQUENCE_LENGTH: + return RAISE(PyExc_ValueError, + "Invalid sequence length for cached blit"); + case FBLITS_ERR_INVALID_DESTINATION: + return RAISE(PyExc_TypeError, + "Invalid destination position for cached blit"); } return RAISE(PyExc_TypeError, "Unknown error"); } From eada109620939fd0ecdd2290d22293fb7b8470f5 Mon Sep 17 00:00:00 2001 From: itzpr3d4t0r <103119829+itzpr3d4t0r@users.noreply.github.com> Date: Fri, 3 May 2024 16:19:34 +0200 Subject: [PATCH 18/28] function now respects the destination's clip rect --- src_c/surface.c | 32 ++++++++++---------------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/src_c/surface.c b/src_c/surface.c index 4bc23ccd35..6cbe399790 100644 --- a/src_c/surface.c +++ b/src_c/surface.c @@ -2260,7 +2260,6 @@ _surf_fblits_cached_item_check_and_blit(pgSurfaceObject *self, PyObject *item, /* manage destinations memory */ Py_ssize_t new_size = PyList_GET_SIZE(pos_list); if (new_size > destinations->alloc_size) { - /* no realloc as we don't care about the previous destinations */ destinations->sequence = (CachedBlitDest *)realloc( destinations->sequence, new_size * sizeof(CachedBlitDest)); @@ -2294,31 +2293,20 @@ _surf_fblits_cached_item_check_and_blit(pgSurfaceObject *self, PyObject *item, return FBLITS_ERR_INVALID_DESTINATION; } - if (x < -src->w || x > dst->w || y < -src->h || y > dst->h) + SDL_Rect *clip_rect = &dst->clip_rect; + SDL_Rect clipped; + if (!SDL_IntersectRect(clip_rect, &(SDL_Rect){x, y, src->w, src->h}, + &clipped)) continue; /* Skip out of bounds destinations */ CachedBlitDest *blit_struct = &destinations->sequence[current_size++]; - blit_struct->pixels = (Uint32 *)dst->pixels; - if (x < 0) { - blit_struct->w = src->w + x; - blit_struct->x = -x; - } - else { - blit_struct->pixels += x; - blit_struct->w = x > dst->w - src->w ? dst->w - x : src->w; - blit_struct->x = 0; - } - - if (y < 0) { - blit_struct->h = src->h + y; - blit_struct->y = -y; - } - else { - blit_struct->pixels += y * dst->pitch / 4; - blit_struct->h = y > dst->h - src->h ? dst->h - y : src->h; - blit_struct->y = 0; - } + blit_struct->pixels = + (Uint32 *)dst->pixels + clipped.y * dst->pitch / 4 + clipped.x; + blit_struct->w = clipped.w; + blit_struct->h = clipped.h; + blit_struct->x = x < clip_rect->x ? clip_rect->x - x : 0; + blit_struct->y = y < clip_rect->y ? clip_rect->y - y : 0; } if (!(destinations->size = current_size)) From 065b3f8f0978741de0f50e08cfd915ca129e0991 Mon Sep 17 00:00:00 2001 From: itzpr3d4t0r <103119829+itzpr3d4t0r@users.noreply.github.com> Date: Tue, 7 May 2024 14:27:15 +0200 Subject: [PATCH 19/28] better support subssurfaces --- src_c/surface.c | 75 +++++++++++++++++++++++++++---------------------- 1 file changed, 42 insertions(+), 33 deletions(-) diff --git a/src_c/surface.c b/src_c/surface.c index 6cbe399790..a3456a8e49 100644 --- a/src_c/surface.c +++ b/src_c/surface.c @@ -2277,39 +2277,7 @@ _surf_fblits_cached_item_check_and_blit(pgSurfaceObject *self, PyObject *item, return FBLITS_ERR_INVALID_SEQUENCE_LENGTH; } - /* load destinations */ - PyObject **list_items = PySequence_Fast_ITEMS(pos_list); - Py_ssize_t current_size = 0; - for (i = 0; i < destinations->size; i++) { - int x, y; - PyObject *tup = list_items[i]; - - if (!PyTuple_Check(tup) || PyTuple_GET_SIZE(tup) != 2) { - return FBLITS_ERR_TUPLE_REQUIRED; - } - - if (!pg_IntFromObj(PyTuple_GET_ITEM(tup, 0), &x) || - !pg_IntFromObj(PyTuple_GET_ITEM(tup, 1), &y)) { - return FBLITS_ERR_INVALID_DESTINATION; - } - - SDL_Rect *clip_rect = &dst->clip_rect; - SDL_Rect clipped; - if (!SDL_IntersectRect(clip_rect, &(SDL_Rect){x, y, src->w, src->h}, - &clipped)) - continue; /* Skip out of bounds destinations */ - - CachedBlitDest *blit_struct = &destinations->sequence[current_size++]; - - blit_struct->pixels = - (Uint32 *)dst->pixels + clipped.y * dst->pitch / 4 + clipped.x; - blit_struct->w = clipped.w; - blit_struct->h = clipped.h; - blit_struct->x = x < clip_rect->x ? clip_rect->x - x : 0; - blit_struct->y = y < clip_rect->y ? clip_rect->y - y : 0; - } - - if (!(destinations->size = current_size)) + if (destinations->size == 0) return 0; if (self->subsurface) { @@ -2342,6 +2310,47 @@ _surf_fblits_cached_item_check_and_blit(pgSurfaceObject *self, PyObject *item, subsurface = NULL; } + /* load destinations */ + PyObject **list_items = PySequence_Fast_ITEMS(pos_list); + Py_ssize_t current_size = 0; + SDL_Rect src_dest = {0, 0, src->w, src->h}; + SDL_Rect temp, *argrect; + for (i = 0; i < destinations->size; i++) { + PyObject *item = list_items[i]; + + if (pg_TwoIntsFromObj(item, &src_dest.x, &src_dest.y)) { + } + else if ((argrect = pgRect_FromObject(item, &temp))) { + src_dest.x = argrect->x; + src_dest.y = argrect->y; + } + else { + return FBLITS_ERR_INVALID_DESTINATION; + } + + SDL_Rect *clip_rect = &dst->clip_rect; + SDL_Rect clipped; + if (!SDL_IntersectRect(clip_rect, &src_dest, &clipped)) + continue; /* Skip out of bounds destinations */ + + CachedBlitDest *d_item = &destinations->sequence[current_size++]; + + d_item->pixels = (Uint32 *)dst->pixels; + d_item->pixels += clipped.y * dst->pitch / 4 + clipped.x; + d_item->w = clipped.w; + d_item->h = clipped.h; + d_item->x = src_dest.x < clip_rect->x ? clip_rect->x - src_dest.x : 0; + d_item->y = src_dest.y < clip_rect->y ? clip_rect->y - src_dest.y : 0; + } + + if (!(destinations->size = current_size)) { + if (subsurface) + SDL_SetClipRect(subsurface, &orig_clip); + else + pgSurface_Unprep(self); + return 0; + } + pgSurface_Prep((pgSurfaceObject *)src_surf); error = SoftCachedBlitPyGame(src, dst, blend_flags, destinations); From 2d2a1c08700c37e6b00fbde4484f4e220b3cd61a Mon Sep 17 00:00:00 2001 From: itzpr3d4t0r <103119829+itzpr3d4t0r@users.noreply.github.com> Date: Sat, 11 May 2024 15:15:30 +0200 Subject: [PATCH 20/28] now correctly draws onto subsurfaces --- src_c/surface.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src_c/surface.c b/src_c/surface.c index a3456a8e49..57312868ba 100644 --- a/src_c/surface.c +++ b/src_c/surface.c @@ -2328,6 +2328,9 @@ _surf_fblits_cached_item_check_and_blit(pgSurfaceObject *self, PyObject *item, return FBLITS_ERR_INVALID_DESTINATION; } + src_dest.x += suboffsetx; + src_dest.y += suboffsety; + SDL_Rect *clip_rect = &dst->clip_rect; SDL_Rect clipped; if (!SDL_IntersectRect(clip_rect, &src_dest, &clipped)) @@ -2335,10 +2338,12 @@ _surf_fblits_cached_item_check_and_blit(pgSurfaceObject *self, PyObject *item, CachedBlitDest *d_item = &destinations->sequence[current_size++]; - d_item->pixels = (Uint32 *)dst->pixels; - d_item->pixels += clipped.y * dst->pitch / 4 + clipped.x; + d_item->pixels = + (Uint32 *)dst->pixels + clipped.y * dst->pitch / 4 + clipped.x; + d_item->w = clipped.w; d_item->h = clipped.h; + d_item->x = src_dest.x < clip_rect->x ? clip_rect->x - src_dest.x : 0; d_item->y = src_dest.y < clip_rect->y ? clip_rect->y - src_dest.y : 0; } From f91a0804f44a89785e56022fcc3554cb9fe207ad Mon Sep 17 00:00:00 2001 From: itzpr3d4t0r <103119829+itzpr3d4t0r@users.noreply.github.com> Date: Sat, 11 May 2024 16:39:32 +0200 Subject: [PATCH 21/28] removed "cache" parameter --- buildconfig/stubs/pygame/surface.pyi | 8 +- docs/reST/ref/surface.rst | 5 +- src_c/doc/surface_doc.h | 2 +- src_c/surface.c | 196 +++++++++++++-------------- 4 files changed, 100 insertions(+), 111 deletions(-) diff --git a/buildconfig/stubs/pygame/surface.pyi b/buildconfig/stubs/pygame/surface.pyi index 564207e386..85f24f0194 100644 --- a/buildconfig/stubs/pygame/surface.pyi +++ b/buildconfig/stubs/pygame/surface.pyi @@ -80,9 +80,13 @@ class Surface: ) -> Union[List[Rect], None]: ... def fblits( self, - blit_sequence: Iterable[Tuple[Surface, Union[Coordinate, RectValue]]], + blit_sequence: Iterable[ + Union[ + Tuple[Surface, Union[Coordinate, RectValue]], + Tuple[Surface, Sequence[Union[Coordinate, RectValue]]], + ] + ], special_flags: int = 0, - cache: bool = False, /, ) -> None: ... @overload diff --git a/docs/reST/ref/surface.rst b/docs/reST/ref/surface.rst index c9e7b0ced2..08d4c357a3 100644 --- a/docs/reST/ref/surface.rst +++ b/docs/reST/ref/surface.rst @@ -197,7 +197,8 @@ .. method:: fblits | :sl:`draw many surfaces onto the calling surface at their corresponding location and the same special_flags` - | :sg:`fblits(blit_sequence=((source, dest), ...), special_flags=0, cache=False/) -> None` + | :sg:`fblits(blit_sequence=((source, dest), ...), special_flags=0/) -> None` + | :sg:`fblits(blit_sequence=((source, [dest1, dest2, ...]), ...), special_flags=0/) -> None` This method takes a sequence of tuples (source, dest) as input, where source is a Surface object and dest is its destination position on this Surface. It draws each source Surface @@ -208,8 +209,6 @@ :param blit_sequence: a sequence of (source, dest) :param special_flags: the flag(s) representing the blend mode used for each surface. See :doc:`special_flags_list` for a list of possible values. - :param cache: a boolean value that determines whether the surface should be cached - for better performance for repeated blitting. :returns: ``None`` diff --git a/src_c/doc/surface_doc.h b/src_c/doc/surface_doc.h index e85b855d6c..60e7fdd0b7 100644 --- a/src_c/doc/surface_doc.h +++ b/src_c/doc/surface_doc.h @@ -2,7 +2,7 @@ #define DOC_SURFACE "Surface((width, height), flags=0, depth=0, masks=None) -> Surface\nSurface((width, height), flags=0, Surface) -> Surface\npygame object for representing images" #define DOC_SURFACE_BLIT "blit(source, dest, area=None, special_flags=0) -> Rect\ndraw another surface onto this one" #define DOC_SURFACE_BLITS "blits(blit_sequence=((source, dest), ...), doreturn=True) -> [Rect, ...] or None\nblits(((source, dest, area), ...)) -> [Rect, ...]\nblits(((source, dest, area, special_flags), ...)) -> [Rect, ...]\ndraw many images onto another" -#define DOC_SURFACE_FBLITS "fblits(blit_sequence=((source, dest), ...), special_flags=0, cache=False/) -> None\ndraw many surfaces onto the calling surface at their corresponding location and the same special_flags" +#define DOC_SURFACE_FBLITS "fblits(blit_sequence=((source, dest), ...), special_flags=0/) -> None\nfblits(blit_sequence=((source, [dest1, dest2, ...]), ...), special_flags=0/) -> None\ndraw many surfaces onto the calling surface at their corresponding location and the same special_flags" #define DOC_SURFACE_CONVERT "convert(surface, /) -> Surface\nconvert(depth, flags=0, /) -> Surface\nconvert(masks, flags=0, /) -> Surface\nconvert() -> Surface\nchange the pixel format of an image" #define DOC_SURFACE_CONVERTALPHA "convert_alpha() -> Surface\nchange the pixel format of an image including per pixel alphas" #define DOC_SURFACE_COPY "copy() -> Surface\ncreate a new copy of a Surface" diff --git a/src_c/surface.c b/src_c/surface.c index 57312868ba..4f4a3a825b 100644 --- a/src_c/surface.c +++ b/src_c/surface.c @@ -2159,91 +2159,36 @@ surf_blits(pgSurfaceObject *self, PyObject *args, PyObject *keywds) #define FBLITS_ERR_NO_MEMORY 18 #define FBLITS_ERR_INVALID_SEQUENCE_LENGTH 19 #define FBLITS_ERR_INVALID_DESTINATION 20 +#define FBLITS_ERR_INVALID_SEQUENCE 21 int -_surf_fblits_item_check_and_blit(pgSurfaceObject *self, PyObject *item, +_surf_fblits_item_check_and_blit(PyObject *src_surf, SDL_Surface *src, + pgSurfaceObject *dest, int x, int y, int blend_flags) { - PyObject *src_surf, *blit_pos; - SDL_Surface *src; - SDL_Rect *src_rect, temp, dest_rect; - - /* Check that the item is a tuple of length 2 */ - if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) { - return FBLITS_ERR_TUPLE_REQUIRED; - } - - /* Extract the Surface and destination objects from the - * (Surface, dest) tuple */ - src_surf = PyTuple_GET_ITEM(item, 0); - blit_pos = PyTuple_GET_ITEM(item, 1); - - /* Check that the source is a Surface */ - if (!pgSurface_Check(src_surf)) { - return BLITS_ERR_SOURCE_NOT_SURFACE; - } - if (!(src = pgSurface_AsSurface(src_surf))) { - return BLITS_ERR_SEQUENCE_SURF; - } + SDL_Rect dest_rect = {x, y, src->w, src->h}; - /* Try to extract a valid blit position */ - if (pg_TwoIntsFromObj(blit_pos, &dest_rect.x, &dest_rect.y)) { - } - else if ((src_rect = pgRect_FromObject(blit_pos, &temp))) { - dest_rect.x = src_rect->x; - dest_rect.y = src_rect->y; - } - else { - return BLITS_ERR_INVALID_DESTINATION; - } - - dest_rect.w = src->w; - dest_rect.h = src->h; - - /* Perform the blit */ - if (pgSurface_Blit(self, (pgSurfaceObject *)src_surf, &dest_rect, NULL, - blend_flags)) { + if (pgSurface_Blit(dest, (pgSurfaceObject *)src_surf, &dest_rect, NULL, + blend_flags)) return BLITS_ERR_BLIT_FAIL; - } return 0; } int -_surf_fblits_cached_item_check_and_blit(pgSurfaceObject *self, PyObject *item, +_surf_fblits_cached_item_check_and_blit(pgSurfaceObject *self, + SDL_Surface *src, SDL_Surface *dst, + PyObject *pos_sequence, int blend_flags, BlitSequence *destinations) { - PyObject *src_surf, *pos_list; - SDL_Surface *src, *dst = pgSurface_AsSurface(self); + PyObject *src_surf; SDL_Surface *subsurface; int suboffsetx = 0, suboffsety = 0; SDL_Rect orig_clip, sub_clip; int error = 0; Py_ssize_t i; - /* Check that the item is a tuple of length 2 */ - if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) { - return FBLITS_ERR_TUPLE_REQUIRED; - } - - /* Extract the Surface and sequence of destination objects from the - * (Surface, positions) tuple */ - src_surf = PyTuple_GET_ITEM(item, 0); - pos_list = PyTuple_GET_ITEM(item, 1); - - if (!PyList_Check(pos_list)) { - return BLITS_ERR_SEQUENCE_REQUIRED; - } - - /* Check that the source is a Surface */ - if (!pgSurface_Check(src_surf)) { - return BLITS_ERR_SOURCE_NOT_SURFACE; - } - if (!(src = pgSurface_AsSurface(src_surf))) { - return BLITS_ERR_SEQUENCE_SURF; - } - /* Check that the source and destination surfaces have the same format */ if (src->format->format != dst->format->format || src->format->BytesPerPixel != dst->format->BytesPerPixel || @@ -2258,7 +2203,7 @@ _surf_fblits_cached_item_check_and_blit(pgSurfaceObject *self, PyObject *item, } /* manage destinations memory */ - Py_ssize_t new_size = PyList_GET_SIZE(pos_list); + Py_ssize_t new_size = PySequence_Fast_GET_SIZE(pos_sequence); if (new_size > destinations->alloc_size) { destinations->sequence = (CachedBlitDest *)realloc( destinations->sequence, new_size * sizeof(CachedBlitDest)); @@ -2311,12 +2256,12 @@ _surf_fblits_cached_item_check_and_blit(pgSurfaceObject *self, PyObject *item, } /* load destinations */ - PyObject **list_items = PySequence_Fast_ITEMS(pos_list); + PyObject **seq_items = PySequence_Fast_ITEMS(pos_sequence); Py_ssize_t current_size = 0; SDL_Rect src_dest = {0, 0, src->w, src->h}; SDL_Rect temp, *argrect; for (i = 0; i < destinations->size; i++) { - PyObject *item = list_items[i]; + PyObject *item = seq_items[i]; if (pg_TwoIntsFromObj(item, &src_dest.x, &src_dest.y)) { } @@ -2372,6 +2317,70 @@ _surf_fblits_cached_item_check_and_blit(pgSurfaceObject *self, PyObject *item, return error; } +static void +_surf_fblits_blit(pgSurfaceObject *self, PyObject *item, int blend_flags, + BlitSequence *destinations, int *error) +{ + PyObject *src_surf, *pos_or_seq; + SDL_Surface *src, *dst = pgSurface_AsSurface(self); + if (!dst) { + *error = BLITS_ERR_DISPLAY_SURF_QUIT; + return; + } + + int x, y; + + if (PyTuple_Check(item) && PyTuple_GET_SIZE(item) == 2) { + /* (Surface, dest) */ + src_surf = PyTuple_GET_ITEM(item, 0); + pos_or_seq = PyTuple_GET_ITEM(item, 1); + } + else { + *error = FBLITS_ERR_TUPLE_REQUIRED; + return; + } + + /* Check that the source is a Surface */ + if (!pgSurface_Check(src_surf)) { + *error = BLITS_ERR_SOURCE_NOT_SURFACE; + return; + } + if (!(src = pgSurface_AsSurface(src_surf))) { + *error = BLITS_ERR_SEQUENCE_SURF; + return; + } + + if (pgRect_Check(pos_or_seq)) { + SDL_Rect *r = &pgRect_AsRect(pos_or_seq); + x = r->x; + y = r->y; + *error = _surf_fblits_item_check_and_blit(src_surf, src, self, x, y, + blend_flags); + return; + } + else if (pgFRect_Check(pos_or_seq)) { + SDL_FRect *r = &pgFRect_AsRect(pos_or_seq); + x = (int)r->x; + y = (int)r->y; + *error = _surf_fblits_item_check_and_blit(src_surf, src, self, x, y, + blend_flags); + return; + } + else if (pg_TwoIntsFromObj(pos_or_seq, &x, &y)) { + *error = _surf_fblits_item_check_and_blit(src_surf, src, self, x, y, + blend_flags); + return; + } + + if (!pgSequenceFast_Check(pos_or_seq)) { + *error = FBLITS_ERR_INVALID_SEQUENCE; + return; + } + + *error = _surf_fblits_cached_item_check_and_blit( + self, src, dst, pos_or_seq, blend_flags, destinations); +} + static PyObject * surf_fblits(pgSurfaceObject *self, PyObject *const *args, Py_ssize_t nargs) { @@ -2382,15 +2391,14 @@ surf_fblits(pgSurfaceObject *self, PyObject *const *args, Py_ssize_t nargs) int blend_flags = 0; /* Default flag is 0, opaque */ int error = 0; int is_generator = 0; - int cache = 0; BlitSequence destinations = {NULL, 0, 0}; - if (nargs == 0 || nargs > 3) { + if (nargs == 0 || nargs > 2) { error = FBLITS_ERR_INCORRECT_ARGS_NUM; goto on_error; } /* Get the blend flags if they are passed */ - else if (nargs >= 2) { + else if (nargs == 2) { if (!PyLong_Check(args[1])) { error = FBLITS_ERR_FLAG_NOT_NUMERIC; goto on_error; @@ -2399,17 +2407,6 @@ surf_fblits(pgSurfaceObject *self, PyObject *const *args, Py_ssize_t nargs) if (PyErr_Occurred()) { return NULL; } - - if (nargs == 3) { - if (!PyBool_Check(args[2])) { - error = FBLITS_ERR_CACHE_NOT_NUMERIC; - goto on_error; - } - cache = PyObject_IsTrue(args[2]); - if (PyErr_Occurred()) { - return NULL; - } - } } blit_sequence = args[0]; @@ -2420,33 +2417,21 @@ surf_fblits(pgSurfaceObject *self, PyObject *const *args, Py_ssize_t nargs) PyObject **sequence_items = PySequence_Fast_ITEMS(blit_sequence); for (i = 0; i < PySequence_Fast_GET_SIZE(blit_sequence); i++) { item = sequence_items[i]; - if (cache) { - error = _surf_fblits_cached_item_check_and_blit( - self, item, blend_flags, &destinations); - } - else { - error = - _surf_fblits_item_check_and_blit(self, item, blend_flags); - } - if (error) { + + _surf_fblits_blit(self, item, blend_flags, &destinations, &error); + + if (error) goto on_error; - } } } else if (PyIter_Check(blit_sequence)) { is_generator = 1; while ((item = PyIter_Next(blit_sequence))) { - if (cache) { - error = _surf_fblits_cached_item_check_and_blit( - self, item, blend_flags, &destinations); - } - else { - error = - _surf_fblits_item_check_and_blit(self, item, blend_flags); - } - if (error) { + _surf_fblits_blit(self, item, blend_flags, &destinations, &error); + + if (error) goto on_error; - } + Py_DECREF(item); } @@ -2521,11 +2506,12 @@ surf_fblits(pgSurfaceObject *self, PyObject *const *args, Py_ssize_t nargs) case FBLITS_ERR_NO_MEMORY: return RAISE(PyExc_MemoryError, "No memory available"); case FBLITS_ERR_INVALID_SEQUENCE_LENGTH: - return RAISE(PyExc_ValueError, - "Invalid sequence length for cached blit"); + return RAISE(PyExc_ValueError, "Invalid sequence length for blit"); case FBLITS_ERR_INVALID_DESTINATION: return RAISE(PyExc_TypeError, - "Invalid destination position for cached blit"); + "Invalid destination position for blit"); + case FBLITS_ERR_INVALID_SEQUENCE: + return RAISE(PyExc_TypeError, "Invalid sequence for multi-blit"); } return RAISE(PyExc_TypeError, "Unknown error"); } From f8f78a8d8bf35973ef46226c30023554f19e0144 Mon Sep 17 00:00:00 2001 From: itzpr3d4t0r <103119829+itzpr3d4t0r@users.noreply.github.com> Date: Sat, 11 May 2024 16:46:10 +0200 Subject: [PATCH 22/28] fix --- src_c/surface.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src_c/surface.c b/src_c/surface.c index 4f4a3a825b..fc470a0090 100644 --- a/src_c/surface.c +++ b/src_c/surface.c @@ -2176,13 +2176,13 @@ _surf_fblits_item_check_and_blit(PyObject *src_surf, SDL_Surface *src, } int -_surf_fblits_cached_item_check_and_blit(pgSurfaceObject *self, +_surf_fblits_cached_item_check_and_blit(PyObject *src_surf, + pgSurfaceObject *self, SDL_Surface *src, SDL_Surface *dst, PyObject *pos_sequence, int blend_flags, BlitSequence *destinations) { - PyObject *src_surf; SDL_Surface *subsurface; int suboffsetx = 0, suboffsety = 0; SDL_Rect orig_clip, sub_clip; @@ -2378,7 +2378,7 @@ _surf_fblits_blit(pgSurfaceObject *self, PyObject *item, int blend_flags, } *error = _surf_fblits_cached_item_check_and_blit( - self, src, dst, pos_or_seq, blend_flags, destinations); + src_surf, self, src, dst, pos_or_seq, blend_flags, destinations); } static PyObject * From b7a7d78762f19bb703d8e98829ab2a1a33a8f83a Mon Sep 17 00:00:00 2001 From: itzpr3d4t0r <103119829+itzpr3d4t0r@users.noreply.github.com> Date: Sun, 12 May 2024 13:49:29 +0200 Subject: [PATCH 23/28] rename "cache" -> "multi" --- src_c/alphablit.c | 10 +++++----- src_c/surface.c | 16 ++++++++-------- src_c/surface.h | 8 ++++---- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src_c/alphablit.c b/src_c/alphablit.c index d1e0a799f8..9df2dbc0fe 100644 --- a/src_c/alphablit.c +++ b/src_c/alphablit.c @@ -590,8 +590,8 @@ SoftBlitPyGame(SDL_Surface *src, SDL_Rect *srcrect, SDL_Surface *dst, } void -pg_cached_blitcopy(SDL_Surface *src, SDL_Surface *dst, - BlitSequence *destinations) +pg_multi_blitcopy(SDL_Surface *src, SDL_Surface *dst, + BlitSequence *destinations) { Py_ssize_t i; for (i = 0; i < destinations->size; i++) { @@ -613,8 +613,8 @@ pg_cached_blitcopy(SDL_Surface *src, SDL_Surface *dst, } int -SoftCachedBlitPyGame(SDL_Surface *src, SDL_Surface *dst, int blend_flags, - BlitSequence *destinations) +SoftMultiBlitPyGame(SDL_Surface *src, SDL_Surface *dst, int blend_flags, + BlitSequence *destinations) { int okay = 1; int src_locked = 0, dst_locked = 0; @@ -645,7 +645,7 @@ SoftCachedBlitPyGame(SDL_Surface *src, SDL_Surface *dst, int blend_flags, } /* blitcopy */ - pg_cached_blitcopy(src, dst, destinations); + pg_multi_blitcopy(src, dst, destinations); break; default: okay = 0; diff --git a/src_c/surface.c b/src_c/surface.c index fc470a0090..f0a8f96f14 100644 --- a/src_c/surface.c +++ b/src_c/surface.c @@ -2176,12 +2176,12 @@ _surf_fblits_item_check_and_blit(PyObject *src_surf, SDL_Surface *src, } int -_surf_fblits_cached_item_check_and_blit(PyObject *src_surf, - pgSurfaceObject *self, - SDL_Surface *src, SDL_Surface *dst, - PyObject *pos_sequence, - int blend_flags, - BlitSequence *destinations) +_surf_fblits_multiblit_item_check_and_blit(PyObject *src_surf, + pgSurfaceObject *self, + SDL_Surface *src, SDL_Surface *dst, + PyObject *pos_sequence, + int blend_flags, + BlitSequence *destinations) { SDL_Surface *subsurface; int suboffsetx = 0, suboffsety = 0; @@ -2303,7 +2303,7 @@ _surf_fblits_cached_item_check_and_blit(PyObject *src_surf, pgSurface_Prep((pgSurfaceObject *)src_surf); - error = SoftCachedBlitPyGame(src, dst, blend_flags, destinations); + error = SoftMultiBlitPyGame(src, dst, blend_flags, destinations); if (subsurface) SDL_SetClipRect(subsurface, &orig_clip); @@ -2377,7 +2377,7 @@ _surf_fblits_blit(pgSurfaceObject *self, PyObject *item, int blend_flags, return; } - *error = _surf_fblits_cached_item_check_and_blit( + *error = _surf_fblits_multiblit_item_check_and_blit( src_surf, self, src, dst, pos_or_seq, blend_flags, destinations); } diff --git a/src_c/surface.h b/src_c/surface.h index 3bb32dbd65..658d7b8cf4 100644 --- a/src_c/surface.h +++ b/src_c/surface.h @@ -364,12 +364,12 @@ pygame_Blit(SDL_Surface *src, SDL_Rect *srcrect, SDL_Surface *dst, SDL_Rect *dstrect, int blend_flags); int -SoftCachedBlitPyGame(SDL_Surface *src, SDL_Surface *dst, int blend_flags, - BlitSequence *destinations); +SoftMultiBlitPyGame(SDL_Surface *src, SDL_Surface *dst, int blend_flags, + BlitSequence *destinations); void -pg_cached_blitcopy(SDL_Surface *src, SDL_Surface *dst, - BlitSequence *destinations); +pg_multi_blitcopy(SDL_Surface *src, SDL_Surface *dst, + BlitSequence *destinations); int premul_surf_color_by_alpha(SDL_Surface *src, SDL_Surface *dst); From 0325472d53e3805eb10772fd9a7eb66bc26c14c0 Mon Sep 17 00:00:00 2001 From: itzpr3d4t0r <103119829+itzpr3d4t0r@users.noreply.github.com> Date: Sat, 18 May 2024 11:32:48 +0200 Subject: [PATCH 24/28] Now properly supporting all rects/rectlike as positions, minor changes and renames --- src_c/alphablit.c | 7 ++++--- src_c/surface.c | 33 ++++++++++----------------------- src_c/surface.h | 4 ++-- 3 files changed, 16 insertions(+), 28 deletions(-) diff --git a/src_c/alphablit.c b/src_c/alphablit.c index 9df2dbc0fe..7d018a9ef7 100644 --- a/src_c/alphablit.c +++ b/src_c/alphablit.c @@ -594,12 +594,13 @@ pg_multi_blitcopy(SDL_Surface *src, SDL_Surface *dst, BlitSequence *destinations) { Py_ssize_t i; + const int src_skip = src->pitch / 4; + const int dst_skip = dst->pitch / 4; + for (i = 0; i < destinations->size; i++) { - CachedBlitDest *item = &destinations->sequence[i]; + BlitDestination *item = &destinations->sequence[i]; const int src_pitch = item->w * sizeof(Uint32); - const int src_skip = src->pitch / 4; - const int dst_skip = dst->pitch / 4; Uint32 *srcp32 = (Uint32 *)src->pixels + item->x + item->y * src_skip; Uint32 *dstp32 = item->pixels; diff --git a/src_c/surface.c b/src_c/surface.c index f0a8f96f14..5ab7815f0b 100644 --- a/src_c/surface.c +++ b/src_c/surface.c @@ -2205,8 +2205,8 @@ _surf_fblits_multiblit_item_check_and_blit(PyObject *src_surf, /* manage destinations memory */ Py_ssize_t new_size = PySequence_Fast_GET_SIZE(pos_sequence); if (new_size > destinations->alloc_size) { - destinations->sequence = (CachedBlitDest *)realloc( - destinations->sequence, new_size * sizeof(CachedBlitDest)); + destinations->sequence = (BlitDestination *)realloc( + destinations->sequence, new_size * sizeof(BlitDestination)); if (!destinations->sequence) return FBLITS_ERR_NO_MEMORY; @@ -2222,9 +2222,6 @@ _surf_fblits_multiblit_item_check_and_blit(PyObject *src_surf, return FBLITS_ERR_INVALID_SEQUENCE_LENGTH; } - if (destinations->size == 0) - return 0; - if (self->subsurface) { PyObject *owner; struct pgSubSurface_Data *subdata; @@ -2281,7 +2278,7 @@ _surf_fblits_multiblit_item_check_and_blit(PyObject *src_surf, if (!SDL_IntersectRect(clip_rect, &src_dest, &clipped)) continue; /* Skip out of bounds destinations */ - CachedBlitDest *d_item = &destinations->sequence[current_size++]; + BlitDestination *d_item = &destinations->sequence[current_size++]; d_item->pixels = (Uint32 *)dst->pixels + clipped.y * dst->pitch / 4 + clipped.x; @@ -2323,6 +2320,7 @@ _surf_fblits_blit(pgSurfaceObject *self, PyObject *item, int blend_flags, { PyObject *src_surf, *pos_or_seq; SDL_Surface *src, *dst = pgSurface_AsSurface(self); + SDL_Rect temp, *argrect = NULL; if (!dst) { *error = BLITS_ERR_DISPLAY_SURF_QUIT; return; @@ -2350,23 +2348,12 @@ _surf_fblits_blit(pgSurfaceObject *self, PyObject *item, int blend_flags, return; } - if (pgRect_Check(pos_or_seq)) { - SDL_Rect *r = &pgRect_AsRect(pos_or_seq); - x = r->x; - y = r->y; - *error = _surf_fblits_item_check_and_blit(src_surf, src, self, x, y, - blend_flags); - return; - } - else if (pgFRect_Check(pos_or_seq)) { - SDL_FRect *r = &pgFRect_AsRect(pos_or_seq); - x = (int)r->x; - y = (int)r->y; - *error = _surf_fblits_item_check_and_blit(src_surf, src, self, x, y, - blend_flags); - return; - } - else if (pg_TwoIntsFromObj(pos_or_seq, &x, &y)) { + if (pg_TwoIntsFromObj(pos_or_seq, &x, &y) || + (argrect = pgRect_FromObject(pos_or_seq, &temp))) { + if (argrect) { + x = argrect->x; + y = argrect->y; + } *error = _surf_fblits_item_check_and_blit(src_surf, src, self, x, y, blend_flags); return; diff --git a/src_c/surface.h b/src_c/surface.h index 658d7b8cf4..9208fbfb0b 100644 --- a/src_c/surface.h +++ b/src_c/surface.h @@ -340,10 +340,10 @@ typedef struct { Uint32 *pixels; int w, h, x, y; -} CachedBlitDest; +} BlitDestination; typedef struct { - CachedBlitDest *sequence; + BlitDestination *sequence; Py_ssize_t alloc_size; Py_ssize_t size; } BlitSequence; From 5acd3a5ba1e50a862dea3c7445553d25fd54abcb Mon Sep 17 00:00:00 2001 From: itzpr3d4t0r <103119829+itzpr3d4t0r@users.noreply.github.com> Date: Sat, 18 May 2024 14:31:17 +0200 Subject: [PATCH 25/28] more changes to marginally improve performance + now using less memory per destination (20bytes -> 16) --- src_c/alphablit.c | 17 +++++++++-------- src_c/surface.c | 31 ++++++++++++++++--------------- src_c/surface.h | 6 +++--- 3 files changed, 28 insertions(+), 26 deletions(-) diff --git a/src_c/alphablit.c b/src_c/alphablit.c index 7d018a9ef7..8d3d0ac771 100644 --- a/src_c/alphablit.c +++ b/src_c/alphablit.c @@ -590,23 +590,24 @@ SoftBlitPyGame(SDL_Surface *src, SDL_Rect *srcrect, SDL_Surface *dst, } void -pg_multi_blitcopy(SDL_Surface *src, SDL_Surface *dst, - BlitSequence *destinations) +pg_multi_blitcopy(SDL_Surface *restrict src, SDL_Surface *restrict dst, + BlitSequence *restrict destinations) { Py_ssize_t i; const int src_skip = src->pitch / 4; const int dst_skip = dst->pitch / 4; + Uint32 *const src_start = (Uint32 *)src->pixels; + for (i = 0; i < destinations->size; i++) { BlitDestination *item = &destinations->sequence[i]; - - const int src_pitch = item->w * sizeof(Uint32); - - Uint32 *srcp32 = (Uint32 *)src->pixels + item->x + item->y * src_skip; Uint32 *dstp32 = item->pixels; + int h = item->rows; + const int copy_w = item->copy_w; + Uint32 *srcp32 = src_start + item->src_offset; - while (item->h--) { - memcpy(dstp32, srcp32, src_pitch); + while (h--) { + memcpy(dstp32, srcp32, copy_w); srcp32 += src_skip; dstp32 += dst_skip; } diff --git a/src_c/surface.c b/src_c/surface.c index 5ab7815f0b..413feb1a26 100644 --- a/src_c/surface.c +++ b/src_c/surface.c @@ -2162,14 +2162,13 @@ surf_blits(pgSurfaceObject *self, PyObject *args, PyObject *keywds) #define FBLITS_ERR_INVALID_SEQUENCE 21 int -_surf_fblits_item_check_and_blit(PyObject *src_surf, SDL_Surface *src, +_surf_fblits_item_check_and_blit(pgSurfaceObject *src_surf, SDL_Surface *src, pgSurfaceObject *dest, int x, int y, int blend_flags) { SDL_Rect dest_rect = {x, y, src->w, src->h}; - if (pgSurface_Blit(dest, (pgSurfaceObject *)src_surf, &dest_rect, NULL, - blend_flags)) + if (pgSurface_Blit(dest, src_surf, &dest_rect, NULL, blend_flags)) return BLITS_ERR_BLIT_FAIL; return 0; @@ -2257,6 +2256,7 @@ _surf_fblits_multiblit_item_check_and_blit(PyObject *src_surf, Py_ssize_t current_size = 0; SDL_Rect src_dest = {0, 0, src->w, src->h}; SDL_Rect temp, *argrect; + const SDL_Rect *clip_rect = &dst->clip_rect; for (i = 0; i < destinations->size; i++) { PyObject *item = seq_items[i]; @@ -2273,7 +2273,6 @@ _surf_fblits_multiblit_item_check_and_blit(PyObject *src_surf, src_dest.x += suboffsetx; src_dest.y += suboffsety; - SDL_Rect *clip_rect = &dst->clip_rect; SDL_Rect clipped; if (!SDL_IntersectRect(clip_rect, &src_dest, &clipped)) continue; /* Skip out of bounds destinations */ @@ -2282,12 +2281,12 @@ _surf_fblits_multiblit_item_check_and_blit(PyObject *src_surf, d_item->pixels = (Uint32 *)dst->pixels + clipped.y * dst->pitch / 4 + clipped.x; - - d_item->w = clipped.w; - d_item->h = clipped.h; - - d_item->x = src_dest.x < clip_rect->x ? clip_rect->x - src_dest.x : 0; - d_item->y = src_dest.y < clip_rect->y ? clip_rect->y - src_dest.y : 0; + d_item->copy_w = clipped.w * 4; + d_item->rows = clipped.h; + d_item->src_offset = + (src_dest.x < clip_rect->x ? clip_rect->x - src_dest.x : 0) + + (src_dest.y < clip_rect->y ? clip_rect->y - src_dest.y : 0) * + src->pitch / 4; } if (!(destinations->size = current_size)) { @@ -2348,14 +2347,17 @@ _surf_fblits_blit(pgSurfaceObject *self, PyObject *item, int blend_flags, return; } + if (!src->w || !src->h) + return; + if (pg_TwoIntsFromObj(pos_or_seq, &x, &y) || (argrect = pgRect_FromObject(pos_or_seq, &temp))) { if (argrect) { x = argrect->x; y = argrect->y; } - *error = _surf_fblits_item_check_and_blit(src_surf, src, self, x, y, - blend_flags); + *error = _surf_fblits_item_check_and_blit( + (pgSurfaceObject *)src_surf, src, self, x, y, blend_flags); return; } @@ -2403,9 +2405,8 @@ surf_fblits(pgSurfaceObject *self, PyObject *const *args, Py_ssize_t nargs) Py_ssize_t i; PyObject **sequence_items = PySequence_Fast_ITEMS(blit_sequence); for (i = 0; i < PySequence_Fast_GET_SIZE(blit_sequence); i++) { - item = sequence_items[i]; - - _surf_fblits_blit(self, item, blend_flags, &destinations, &error); + _surf_fblits_blit(self, sequence_items[i], blend_flags, + &destinations, &error); if (error) goto on_error; diff --git a/src_c/surface.h b/src_c/surface.h index 9208fbfb0b..0ed2d0614a 100644 --- a/src_c/surface.h +++ b/src_c/surface.h @@ -339,7 +339,7 @@ typedef struct { Uint32 *pixels; - int w, h, x, y; + int copy_w, rows, src_offset; } BlitDestination; typedef struct { @@ -368,8 +368,8 @@ SoftMultiBlitPyGame(SDL_Surface *src, SDL_Surface *dst, int blend_flags, BlitSequence *destinations); void -pg_multi_blitcopy(SDL_Surface *src, SDL_Surface *dst, - BlitSequence *destinations); +pg_multi_blitcopy(SDL_Surface *restrict src, SDL_Surface *restrict dst, + BlitSequence *restrict destinations); int premul_surf_color_by_alpha(SDL_Surface *src, SDL_Surface *dst); From 7a5834b1c1fadf9798c2e8271826c824fcaceb9b Mon Sep 17 00:00:00 2001 From: itzpr3d4t0r <103119829+itzpr3d4t0r@users.noreply.github.com> Date: Wed, 22 May 2024 15:07:13 +0200 Subject: [PATCH 26/28] forgot a rename --- src_c/alphablit.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src_c/alphablit.c b/src_c/alphablit.c index 8d3d0ac771..2e485e40d5 100644 --- a/src_c/alphablit.c +++ b/src_c/alphablit.c @@ -63,12 +63,12 @@ SoftBlitPyGame(SDL_Surface *src, SDL_Rect *srcrect, SDL_Surface *dst, SDL_Rect *dstrect, int blend_flags); int -SoftCachedBlitPyGame(SDL_Surface *src, SDL_Surface *dst, int blend_flags, - BlitSequence *destinations); +SoftMultiBlitPyGame(SDL_Surface *src, SDL_Surface *dst, int blend_flags, + BlitSequence *destinations); void -pg_cached_blitcopy(SDL_Surface *src, SDL_Surface *dst, - BlitSequence *destinations); +pg_multi_blitcopy(SDL_Surface *src, SDL_Surface *dst, + BlitSequence *destinations); extern int SDL_RLESurface(SDL_Surface *surface); From d50fd98f6acfa5df71783889149c34242643492b Mon Sep 17 00:00:00 2001 From: itzpr3d4t0r <103119829+itzpr3d4t0r@users.noreply.github.com> Date: Wed, 22 May 2024 16:02:28 +0200 Subject: [PATCH 27/28] removed unused declaration --- src_c/alphablit.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src_c/alphablit.c b/src_c/alphablit.c index 2e485e40d5..a1fc930487 100644 --- a/src_c/alphablit.c +++ b/src_c/alphablit.c @@ -66,10 +66,6 @@ int SoftMultiBlitPyGame(SDL_Surface *src, SDL_Surface *dst, int blend_flags, BlitSequence *destinations); -void -pg_multi_blitcopy(SDL_Surface *src, SDL_Surface *dst, - BlitSequence *destinations); - extern int SDL_RLESurface(SDL_Surface *surface); extern void From 662fa0ee5c21f078322e095fc21481333bee5f77 Mon Sep 17 00:00:00 2001 From: itzpr3d4t0r <103119829+itzpr3d4t0r@users.noreply.github.com> Date: Wed, 22 May 2024 21:12:46 +0200 Subject: [PATCH 28/28] fix and rename --- src_c/alphablit.c | 2 +- src_c/surface.c | 2 +- src_c/surface.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src_c/alphablit.c b/src_c/alphablit.c index a1fc930487..e0b75da455 100644 --- a/src_c/alphablit.c +++ b/src_c/alphablit.c @@ -599,7 +599,7 @@ pg_multi_blitcopy(SDL_Surface *restrict src, SDL_Surface *restrict dst, BlitDestination *item = &destinations->sequence[i]; Uint32 *dstp32 = item->pixels; int h = item->rows; - const int copy_w = item->copy_w; + const int copy_w = item->width * 4; Uint32 *srcp32 = src_start + item->src_offset; while (h--) { diff --git a/src_c/surface.c b/src_c/surface.c index 413feb1a26..e2e58d8306 100644 --- a/src_c/surface.c +++ b/src_c/surface.c @@ -2281,7 +2281,7 @@ _surf_fblits_multiblit_item_check_and_blit(PyObject *src_surf, d_item->pixels = (Uint32 *)dst->pixels + clipped.y * dst->pitch / 4 + clipped.x; - d_item->copy_w = clipped.w * 4; + d_item->width = clipped.w; d_item->rows = clipped.h; d_item->src_offset = (src_dest.x < clip_rect->x ? clip_rect->x - src_dest.x : 0) + diff --git a/src_c/surface.h b/src_c/surface.h index 0ed2d0614a..c02b60c31f 100644 --- a/src_c/surface.h +++ b/src_c/surface.h @@ -339,7 +339,7 @@ typedef struct { Uint32 *pixels; - int copy_w, rows, src_offset; + int width, rows, src_offset; } BlitDestination; typedef struct {