Skip to content

Commit a5dabb2

Browse files
committed
vc_copylineV210toRGB: improvements
- use generic BT.709 macros - compute last block if width % 6 != 0 - precompute removing shifted zero from chroma The computation now should be actually faster, since we are still computing in int as before but without converting from/to doubles.
1 parent f35b046 commit a5dabb2

File tree

1 file changed

+29
-16
lines changed

1 file changed

+29
-16
lines changed

src/pixfmt_conv.c

Lines changed: 29 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2570,36 +2570,49 @@ static void vc_copylineV210toY416(unsigned char * __restrict dst, const unsigned
25702570
static void vc_copylineV210toRGB(unsigned char * __restrict dst, const unsigned char * __restrict src, int dst_len, int rshift,
25712571
int gshift, int bshift)
25722572
{
2573+
enum {
2574+
IDEPTH = 8,
2575+
Y_SHIFT = 1 << (IDEPTH - 4),
2576+
C_SHIFT = 1 << (IDEPTH - 1),
2577+
ODEPTH = 8,
2578+
PIX_COUNT = 6,
2579+
RGB_BPP = 3,
2580+
OUT_BL_SZ = PIX_COUNT * RGB_BPP,
2581+
};
25732582
UNUSED(rshift), UNUSED(gshift), UNUSED(bshift);
25742583
#define WRITE_YUV_AS_RGB(y, u, v) \
2575-
val = 1.164 * (y - 16) + 1.793 * (v - 128);\
2576-
*(dst++) = CLAMP(val, 0, 255);\
2577-
val = 1.164 * (y - 16) - 0.534 * (v - 128) - 0.213 * (u - 128);\
2578-
*(dst++) = CLAMP(val, 0, 255);\
2579-
val = 1.164 * (y - 16) + 2.115 * (u - 128);\
2580-
*(dst++) = CLAMP(val, 0, 255);
2581-
2584+
(y) = Y_SCALE * ((y) - Y_SHIFT); \
2585+
val = (YCBCR_TO_R_709_SCALED((y), (u), (v)) >> (COMP_BASE)); \
2586+
*(dst++) = CLAMP_FULL(val, ODEPTH); \
2587+
val = (YCBCR_TO_G_709_SCALED((y), (u), (v)) >> (COMP_BASE)); \
2588+
*(dst++) = CLAMP_FULL(val, ODEPTH); \
2589+
val = (YCBCR_TO_B_709_SCALED((y), (u), (v)) >> (COMP_BASE)); \
2590+
*(dst++) = CLAMP_FULL(val, ODEPTH);
2591+
2592+
// read 8 bits from v210 directly
25822593
#define DECLARE_LOAD_V210_COMPONENTS(a, b, c) \
2583-
unsigned char a = src[1] << 6 | src[0] >> 2;\
2584-
unsigned char b = src[2] << 4 | src[1] >> 4;\
2585-
unsigned char c = src[3] << 2 | src[2] >> 6;\
2586-
2594+
comp_type_t a = (src[1] & 0x3) << 6 | src[0] >> 2;\
2595+
comp_type_t b = (src[2] & 0xF) << 4 | src[1] >> 4;\
2596+
comp_type_t c = (src[3] & 0x3F) << 2 | src[2] >> 6;\
25872597

2588-
OPTIMIZED_FOR (int x = 0; x + 6 * 3 <= dst_len; x += 6 * 3){
2598+
OPTIMIZED_FOR (int x = 0; x < dst_len; x += OUT_BL_SZ){
25892599
DECLARE_LOAD_V210_COMPONENTS(u01, y0, v01);
25902600
src += 4;
2591-
25922601
DECLARE_LOAD_V210_COMPONENTS(y1, u23, y2);
25932602
src += 4;
2594-
25952603
DECLARE_LOAD_V210_COMPONENTS(v23, y3, u45);
25962604
src += 4;
2597-
25982605
DECLARE_LOAD_V210_COMPONENTS(y4, v45, y5);
25992606
src += 4;
26002607

2601-
int val;
2608+
comp_type_t val = 0;
26022609

2610+
u01 -= C_SHIFT;
2611+
v01 -= C_SHIFT;
2612+
u23 -= C_SHIFT;
2613+
v23 -= C_SHIFT;
2614+
u45 -= C_SHIFT;
2615+
v45 -= C_SHIFT;
26032616
WRITE_YUV_AS_RGB(y0, u01, v01);
26042617
WRITE_YUV_AS_RGB(y1, u01, v01);
26052618
WRITE_YUV_AS_RGB(y2, u23, v23);

0 commit comments

Comments
 (0)