diff --git a/overviewer_core/src/block_class.c b/overviewer_core/src/block_class.c index 7fc7187..d9e9b0b 100644 --- a/overviewer_core/src/block_class.c +++ b/overviewer_core/src/block_class.c @@ -31,6 +31,28 @@ bool block_class_is_subset( size_t block_class_len) { size_t i = 0; +#if defined(__AVX512F__) && defined(__AVX512BW__) + for (; i / 32 < block_class_len / 32; i += 32) { + const __m512i block_class_vec = _mm512_loadu_si512( + (__m512i*)&block_class[i]); + const __m512i block_vec = _mm512_set1_epi16(block); + const __mmask32 block_cmp = _mm512_cmpeq_epi16_mask(block_vec, block_class_vec); + if (block_cmp) { + return true; + } + } +#endif +#if defined(__AVX2__) + for (; i / 16 < block_class_len / 16; i += 16) { + const __m256i block_class_vec = _mm256_loadu_si256( + (__m256i*)&block_class[i]); + const __m256i block_vec = _mm256_set1_epi16(block); + const __m256i block_cmp = _mm256_cmpeq_epi16(block_vec, block_class_vec); + if (_mm256_movemask_epi8(block_cmp)) { + return true; + } + } +#endif #ifdef __SSE2__ for (; i / 8 < block_class_len / 8; i += 8) { const __m128i block_class_vec = _mm_loadu_si128( diff --git a/overviewer_core/src/overviewer.h b/overviewer_core/src/overviewer.h index 0296647..a8b9c88 100644 --- a/overviewer_core/src/overviewer.h +++ b/overviewer_core/src/overviewer.h @@ -31,7 +31,7 @@ // increment this value if you've made a change to the c extension // and want to force users to rebuild -#define OVERVIEWER_EXTENSION_VERSION 109 +#define OVERVIEWER_EXTENSION_VERSION 110 #include #include