@@ -86,31 +86,22 @@ _pg_has_avx2()
86
86
87
87
/* Setup for RUN_16BIT_SHUFFLE_OUT */
88
88
#define SETUP_SHUFFLE \
89
- __m256i shuff_out_A = \
90
- _mm256_set_epi8(0x80, 23, 0x80, 22, 0x80, 21, 0x80, 20, 0x80, 19, \
91
- 0x80, 18, 0x80, 17, 0x80, 16, 0x80, 7, 0x80, 6, 0x80, \
92
- 5, 0x80, 4, 0x80, 3, 0x80, 2, 0x80, 1, 0x80, 0); \
93
- \
94
- __m256i shuff_out_B = _mm256_set_epi8( \
95
- 0x80, 31, 0x80, 30, 0x80, 29, 0x80, 28, 0x80, 27, 0x80, 26, 0x80, 25, \
96
- 0x80, 24, 0x80, 15, 0x80, 14, 0x80, 13, 0x80, 12, 0x80, 11, 0x80, 10, \
97
- 0x80, 9, 0x80, 8); \
98
- \
99
- __m256i shuff_dst, _shuff16_temp, mm256_colorA, mm256_colorB; \
100
- mm256_colorA = _mm256_shuffle_epi8(mm256_color, shuff_out_A); \
101
- mm256_colorB = _mm256_shuffle_epi8(mm256_color, shuff_out_B);
89
+ __m256i shuff_dst, _shuff16_temp, mm256_colorA, mm256_colorB, mm256_zero; \
90
+ mm256_zero = _mm256_setzero_si256(); \
91
+ mm256_colorA = _mm256_unpacklo_epi8(mm256_color, mm256_zero); \
92
+ mm256_colorB = _mm256_unpackhi_epi8(mm256_color, mm256_zero);
102
93
103
94
#define RUN_16BIT_SHUFFLE_OUT (FILL_CODE ) \
104
95
/* ==== shuffle pixels out into two registers each, src */ \
105
96
/* and dst set up for 16 bit math, like 0A0R0G0B ==== */ \
106
- shuff_dst = _mm256_shuffle_epi8 (mm256_dst, shuff_out_A ); \
97
+ shuff_dst = _mm256_unpacklo_epi8 (mm256_dst, mm256_zero ); \
107
98
mm256_color = mm256_colorA; \
108
99
\
109
100
{FILL_CODE} \
110
101
\
111
102
_shuff16_temp = shuff_dst; \
112
103
\
113
- shuff_dst = _mm256_shuffle_epi8 (mm256_dst, shuff_out_B ); \
104
+ shuff_dst = _mm256_unpackhi_epi8 (mm256_dst, mm256_zero ); \
114
105
mm256_color = mm256_colorB; \
115
106
\
116
107
{FILL_CODE} \
0 commit comments