block_class: Complete x86-SIMD
Accelerates `block_class_is_subset` with all available x86 simd features up to AVX512.
This commit is contained in:
parent
97ef871e36
commit
cbd9496143
|
@ -31,6 +31,28 @@ bool block_class_is_subset(
|
|||
size_t block_class_len) {
|
||||
size_t i = 0;
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512BW__)
|
||||
for (; i / 32 < block_class_len / 32; i += 32) {
|
||||
const __m512i block_class_vec = _mm512_loadu_si512(
|
||||
(__m512i*)&block_class[i]);
|
||||
const __m512i block_vec = _mm512_set1_epi16(block);
|
||||
const __mmask32 block_cmp = _mm512_cmpeq_epi16_mask(block_vec, block_class_vec);
|
||||
if (block_cmp) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(__AVX2__)
|
||||
for (; i / 16 < block_class_len / 16; i += 16) {
|
||||
const __m256i block_class_vec = _mm256_loadu_si256(
|
||||
(__m256i*)&block_class[i]);
|
||||
const __m256i block_vec = _mm256_set1_epi16(block);
|
||||
const __m256i block_cmp = _mm256_cmpeq_epi16(block_vec, block_class_vec);
|
||||
if (_mm256_movemask_epi8(block_cmp)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#ifdef __SSE2__
|
||||
for (; i / 8 < block_class_len / 8; i += 8) {
|
||||
const __m128i block_class_vec = _mm_loadu_si128(
|
||||
|
|
|
@ -31,7 +31,7 @@
|
|||
|
||||
// increment this value if you've made a change to the c extension
|
||||
// and want to force users to rebuild
|
||||
#define OVERVIEWER_EXTENSION_VERSION 109
|
||||
#define OVERVIEWER_EXTENSION_VERSION 110
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
|
Loading…
Reference in New Issue