0

我正在尝试从 _m128i 位掩码返回_mm_cmpistrm,它指示 128 位源中的哪些字节包含空格。但是,我遇到了一些问题,因为当我将第一个和第三个字节设置为空格时,我得到的结果是 {255,255,0,0,0,0,0,0....} 而不是 {255,0,255 ,0,0,0,...}....

有人可以帮助/启发吗?

#include <stdio.h>
#include <nmmintrin.h>
#include <iostream>

using namespace std;

int main ()
{
    __m128i a, b;

    const int mode = _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY  | _SIDD_LEAST_SIGNIFICANT;

    a.m128i_u8[15] = 't';   
    a.m128i_u8[14] = 'e';
    a.m128i_u8[13] = 's';
    a.m128i_u8[12] = 'o';
    a.m128i_u8[11] = 'd';
    a.m128i_u8[10] = 'i';
    a.m128i_u8[9] = 'f';
    a.m128i_u8[8] = 'g';
    a.m128i_u8[7] = 't';
    a.m128i_u8[6] = 'd';
    a.m128i_u8[5] = 'b';
    a.m128i_u8[4] = 'n';
    a.m128i_u8[3] = 'd';
    a.m128i_u8[2] = ' ';
    a.m128i_u8[1] = 'i';
    a.m128i_u8[0] = ' ';

    b.m128i_u8[15] = ' ';
    b.m128i_u8[14] = ' ';
    b.m128i_u8[13] = ' ';
    b.m128i_u8[12] = ' ';
    b.m128i_u8[11] = ' ';
    b.m128i_u8[10] = ' ';
    b.m128i_u8[9] = ' ';
    b.m128i_u8[8] = ' ';
    b.m128i_u8[7] = ' ';
    b.m128i_u8[6] = ' ';
    b.m128i_u8[5] = ' ';
    b.m128i_u8[4] = ' ';
    b.m128i_u8[3] = ' ';
    b.m128i_u8[2] = ' ';
    b.m128i_u8[1] = ' ';
    b.m128i_u8[0] = ' ';

    __m128i returnValue = _mm_cmpistrm(a, b, mode);

    std::cout << int(static_cast<unsigned char>(returnValue.m128i_u8[0])) << endl;
    std::cout << int(static_cast<unsigned char>(returnValue.m128i_u8[1])) << endl;
    std::cout << int(static_cast<unsigned char>(returnValue.m128i_u8[2])) << endl;
    std::cout << int(static_cast<unsigned char>(returnValue.m128i_u8[3])) << endl;
    std::cout << int(static_cast<unsigned char>(returnValue.m128i_u8[4])) << endl;
    std::cout << int(static_cast<unsigned char>(returnValue.m128i_u8[5])) << endl;
    std::cout << int(static_cast<unsigned char>(returnValue.m128i_u8[6])) << endl;
    std::cout << int(static_cast<unsigned char>(returnValue.m128i_u8[7])) << endl;
    std::cout << int(static_cast<unsigned char>(returnValue.m128i_u8[8])) << endl;
    std::cout << int(static_cast<unsigned char>(returnValue.m128i_u8[9])) << endl;
    std::cout << int(static_cast<unsigned char>(returnValue.m128i_u8[10])) << endl;
    std::cout << int(static_cast<unsigned char>(returnValue.m128i_u8[11])) << endl;
    std::cout << int(static_cast<unsigned char>(returnValue.m128i_u8[12])) << endl;
    std::cout << int(static_cast<unsigned char>(returnValue.m128i_u8[13])) << endl;
    std::cout << int(static_cast<unsigned char>(returnValue.m128i_u8[14])) << endl;
    std::cout << int(static_cast<unsigned char>(returnValue.m128i_u8[15])) << endl;

    return 0;
}
4

1 回答 1

1

两个可能的问题:

  • 我相信结果是结果向量的低位两个字节中的压缩 16 位字段,因此在这种情况下,您的结果是 16 个 TRUE 标志。

  • 您正在使用_SIDD_CMP_EQUAL_ANY,我认为您可能需要使用_SIDD_CMP_EQUAL_EACH- 如果您进行此更改,我认为您的结果将是 5, 0, 0, 0, ... 表示元素 0 和 2 匹配。

注意:虽然我在 SSE 和 AVX 方面做了很多工作,但我从未使用过这些字符串指令,所以我只是将上述观点建立在快速阅读英特尔文档的基础上。

于 2013-10-19T21:28:30.383 回答