0

我需要有关 SIMD-ifing 这些特定功能的帮助。

我只是在学习 SIMD 和 SSE 以及随之而来的所有好处,因此您的帮助将是

伟大的。谢谢

void block_8x8(uint8_t *block1, 
               uint8_t *block2, 
               int stride, 
               int *result)

{   

*result = 0;

    int u,v;

    for (v=0; v<8; ++v)
        for (u=0; u<8; ++u)
            *result += abs(block2[v*stride+u] - block1[v*stride+u]);
}
4

1 回答 1

2

这是 SSE (Intel)/MMX+ (AMD) 版本。

void block_8x8(uint8_t *block1, 
               uint8_t *block2, 
               int stride, 
               int *result)

{   
    const __m64 sad0 = _mm_sad_pu8(*((const __m64*)(block1)), *((const __m64*)(block2)));
    block1 += stride; block2 += stride;
    const __m64 sad1 = _mm_sad_pu8(*((const __m64*)(block1)), *((const __m64*)(block2)));
    block1 += stride; block2 += stride;
    const __m64 sad2 = _mm_sad_pu8(*((const __m64*)(block1)), *((const __m64*)(block2)));
    block1 += stride; block2 += stride;
    const __m64 sad3 = _mm_sad_pu8(*((const __m64*)(block1)), *((const __m64*)(block2)));
    block1 += stride; block2 += stride;
    const __m64 sad4 = _mm_sad_pu8(*((const __m64*)(block1)), *((const __m64*)(block2)));
    block1 += stride; block2 += stride;
    const __m64 sad5 = _mm_sad_pu8(*((const __m64*)(block1)), *((const __m64*)(block2)));
    block1 += stride; block2 += stride;
    const __m64 sad6 = _mm_sad_pu8(*((const __m64*)(block1)), *((const __m64*)(block2)));
    block1 += stride; block2 += stride;
    const __m64 sad7 = _mm_sad_pu8(*((const __m64*)(block1)), *((const __m64*)(block2)));

    *result = _mm_cvtsi64_si32(_mm_add_pi32(
        _mm_add_pi32(
            _mm_add_pi32(sad0, sad1),
            _mm_add_pi32(sad2, sad3)
        ),
        _mm_add_pi32(
            _mm_add_pi32(sad4, sad5),
            _mm_add_pi32(sad6, sad7)
        )
    ));
    _mm_empty();
}
于 2012-09-24T17:20:28.073 回答