-1
float myfunction ( P b1, P b2, int dimention )
{       
    __m128 v_b1,v_b2,v_b3;
    int j=0;

    for (int i=0; i<dimention/4; i++) {
        v_b1=_mm_load_ps(b1.c +j);
        v_b2=_mm_load_ps(b2.c +j);
        v_b3=_mm_sub_ps(v_b1,v_b2);
        j+=4;   
    }
}   

我真正想做的是:

for (int i=0; i<dimention; i++ ) {
    result += b1.c[i] - b2.c[i];
}

我想回来result。你能帮助我吗?

4

1 回答 1

2

我假设您想对所有差异求和,然后将其作为函数结果返回:

#include <pmmintrin.h>                   // SSE3

float myfunction ( P b1, P b2, int dimention )
{       
    __m128 v_b1, v_b2, v_b3, v_b4;
    float f;

    v_b4 = _mm_setzero_ps();             // initialise sum of differences to zero
    for (int j = 0; j < dimention; j += 4)
    {
        v_b1 = _mm_load_ps(b1.c + j);    // load 4 floats from b1[j]
        v_b2 = _mm_load_ps(b2.c + j);    // load 4 floats form b2[j]
        v_b3 = _mm_sub_ps(v_b1, v_b2);   // calc 4 differences
        v_b4 = _mm_add_ps(v_b4, v_b3);   // accumulate 4 differences
    }
    v_b4 = _mm_hadd_ps(v_b4, v_b4);      // sum horizontally
    v_b4 = _mm_hadd_ps(v_b4, v_b4);      // (NB: need to do this twice to sum all 4 elements)
    _mm_store_ss(&f, v_b4);              // extract sum
    return f;                            // return sum
}

如果这不是你想要做的,那么请用更多细节更新你的问题,我会相应地更新代码。

于 2013-04-29T10:21:44.970 回答