0

简而言之,我正在尝试使用非托管 C++ 乘法函数从 C# 进行 SIMD 数学运算并弄乱指针。

C++ 函数:

extern "C" __declspec(dllexport) void SIMDVector4Mult( __m128* a, __m128* b )
{
    __m128 c = _mm_mul_ps( *a, *b );
    memcpy( &(a->m128_f32), &(c.m128_f32), sizeof( a->m128_f32 ) );
}

C# SIMDVector4 结构:

[StructLayout( LayoutKind.Sequential, Size = 16 )]
struct SIMDVector4
{
    float x;
    float y;
    float z;
    float w;


    public SIMDVector4( float x, float y, float z, float w )
    {
        this.x = x;
        this.y = y;
        this.z = z;
        this.w = w;
    }


    public override string ToString( )
    {
        return "X: " + x + ", Y: " + y + ", Z: " + z + ", W: " + w;
    }


    public static unsafe SIMDVector4 operator *( SIMDVector4 a, SIMDVector4 b )
    {
        SIMDVector4Mult( ref a, ref b );
        return a;
    }


    [DllImport("SIMDMathLibrary.dll", CallingConvention = CallingConvention.Cdecl )]
    extern private static unsafe void SIMDVector4Mult( ref SIMDVector4 a, ref SIMDVector4 b );
}

目的是将 SIMD 数学添加到我的 C# 代码库中,以及使用 P/Invoke 和结合 C# 和 C++ 进行一些练习。

如果我将它们作为 float* 变量接收,将它们 memcpy 到临时 __m128 变量,进行乘法运算,然后将它们 memcpy 回来,它就可以工作。但这比在 C# 中将每个变量单独相乘要慢。我使用优化的 Vector4 类对此进行了测试;它的性能大约是复制-乘法-复制方法的十倍。

我已经尝试了几乎十几种不同的方法,包括当前设置,它遇到了标题中提到的受保护内存读/写访问错误:“尝试读取或写入受保护内存。这通常表明其他内存已损坏。”

如何修复此错误?我应该修复这个错误吗?我应该尝试另一种完全实现意图的方法吗?

谢谢。

4

1 回答 1

2

这是一个应该可以工作的更新版本。现在该结构包装了一个对齐的 ptr,所以我们需要额外的 4-8 个字节。不确定由于间接导致的性能命中是什么。VirtualAlloc 一次对齐一堆,所以我们一次分配一个大块。

下面的例子非常草率。如果可能,您可能还想考虑使用 CLI/C++。

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Runtime.InteropServices;
using System.Runtime.ConstrainedExecution;


namespace ConsoleApplication1
{
    class Program
    {
        unsafe static void Main(string[] args)
        {
            using (AlignedBlock block = new AlignedBlock(SIMDVector4.Sizeof, AlignedBlock.AllocationGranularity / SIMDVector4.Sizeof))
            {
                for (int index = 0; index < block.Count; index++)
                {
                    SIMDVector4.Create(block[index], 1, 2, 3, 4);
                }

                SIMDVector4 a = SIMDVector4.Create(block[0], 1, 2, 3, 4);
                SIMDVector4 b = SIMDVector4.Create(block[1], 1, 2, 3, 4);
                SIMDVector4 c = SIMDVector4.Create(block[2], 5, 6, 7, 8);
                SIMDVector4 d = a * b * c;
                Console.WriteLine(d.ToString());
            }

            Console.ReadLine();
        }
    }

    public sealed unsafe class AlignedBlock : CriticalFinalizerObject, IDisposable
    {
        private void* ptr;

        static AlignedBlock()
        {
            SYSTEM_INFO info = new SYSTEM_INFO();
            NativeMethods.GetSystemInfo(ref info);
            PageSize = (int)info.dwPageSize;
            AllocationGranularity = (int)info.dwAllocationGranularity;
        }

        public AlignedBlock(int itemSize, int count)
        {
            this.ItemSize = itemSize; 
            this.Count = count;
            ptr = NativeMethods.VirtualAlloc(IntPtr.Zero, new UIntPtr((uint)this.ByteSize), AllocationType.COMMIT, MemoryProtection.READWRITE).ToPointer();
        }


        ~AlignedBlock()
        {
            this.Dispose();
        }

        public static int PageSize { get; private set; }
        public static int AllocationGranularity { get; private set; }
        public int ItemSize { get; private set; }
        public int Count { get; private set; }
        public int ByteSize
        {
            get
            {
                return this.Count * this.ItemSize;
            }
        }

        public void* this[int index]
        {
            get
            {
                int offset = this.ItemSize * index;
                return ((byte*) this.ptr) + offset;
            }
        }

        public void Dispose()
        {
            bool result = NativeMethods.VirtualFree(new IntPtr(this.ptr), new UIntPtr(0), 0x8000);
            this.ptr = null;
        }
    }

    [StructLayout(LayoutKind.Sequential)]
    unsafe struct SIMDVector4
    {
        public const int Sizeof = 16;
        private float* ptr;

        public static SIMDVector4 Create(void* ptr, float x, float y, float z, float w)
        {
            float* value = (float*)ptr;

            SIMDVector4 vector = new SIMDVector4(value);
            *value = x;
            value++;
            *value = y;
            value++;
            *value = z;
            value++;
            *value = w;
            value++;
            return vector;
        }

        private SIMDVector4(float* ptr)
        {
            this.ptr = ptr;
        }

        public bool IsEmpty
        {
            get
            {
                return this.ptr == null;
            }
        }
        public float x
        {
            get
            {
                if (this.IsEmpty)
                {
                    return 0f;
                }

                return *(ptr);
            }

            set
            {
                if (!this.IsEmpty)
                {
                    *(this.ptr) = value;
                }
            }

        }
        public float y
        {
            get
            {
                if (this.IsEmpty)
                {
                    return 0f;
                }

                return *(ptr + 1);
            }

            set
            {
                if (!this.IsEmpty)
                {
                    *(this.ptr + 1) = value;
                }
            }

        }
        public float z
        {
            get
            {
                if (this.IsEmpty)
                {
                    return 0f;
                }

                return *(ptr + 2);
            }

            set
            {
                if (!this.IsEmpty)
                {
                    *(this.ptr + 2) = value;
                }
            }

        }
        public float w
        {
            get
            {
                if (this.IsEmpty)
                {
                    return 0f;
                }

                return *(ptr + 3);
            }

            set
            {
                if (!this.IsEmpty)
                {
                    *(this.ptr + 3) = value;
                }
            }
        }

        public override string ToString()
        {
            return "X: " + x + ", Y: " + y + ", Z: " + z + ", W: " + w;
        }


        public static SIMDVector4 operator *(SIMDVector4 a, SIMDVector4 b)
        {

            SIMDVector4Mult(a.ptr, b.ptr);
            return new SIMDVector4(a.ptr);
        }

        [DllImport(@"C:\Users\xxx\Documents\Visual Studio 2010\Projects\TestDll\Debug\TestDll.dll", CallingConvention = CallingConvention.Cdecl)]
        extern private static unsafe void SIMDVector4Mult(void* a, void* b);
    }

    internal static class NativeMethods
    {

        [DllImport("kernel32.dll", SetLastError = true)]
        public static extern bool VirtualFree(IntPtr lpAddress, UIntPtr dwSize,
               uint dwFreeType);

        [DllImport("kernel32.dll", SetLastError = true)]
        public static extern IntPtr VirtualAlloc(IntPtr lpAddress, UIntPtr dwSize,
               AllocationType flAllocationType, MemoryProtection flProtect);

        [DllImport("kernel32.dll")]
        public static extern void GetSystemInfo([MarshalAs(UnmanagedType.Struct)] ref SYSTEM_INFO lpSystemInfo);

    }


    [StructLayout(LayoutKind.Sequential)]
    public struct SYSTEM_INFO
    {
        internal _PROCESSOR_INFO_UNION uProcessorInfo;
        public uint dwPageSize;
        public IntPtr lpMinimumApplicationAddress;
        public IntPtr lpMaximumApplicationAddress;
        public IntPtr dwActiveProcessorMask;
        public uint dwNumberOfProcessors;
        public uint dwProcessorType;
        public uint dwAllocationGranularity;
        public ushort dwProcessorLevel;
        public ushort dwProcessorRevision;
    }

    [StructLayout(LayoutKind.Explicit)]
    public struct _PROCESSOR_INFO_UNION
    {
        [FieldOffset(0)]
        internal uint dwOemId;
        [FieldOffset(0)]
        internal ushort wProcessorArchitecture;
        [FieldOffset(2)]
        internal ushort wReserved;
    }

    [Flags()]
    public enum AllocationType : uint
    {
        COMMIT = 0x1000,
        RESERVE = 0x2000,
        RESET = 0x80000,
        LARGE_PAGES = 0x20000000,
        PHYSICAL = 0x400000,
        TOP_DOWN = 0x100000,
        WRITE_WATCH = 0x200000
    }

    [Flags()]
    public enum MemoryProtection : uint
    {
        EXECUTE = 0x10,
        EXECUTE_READ = 0x20,
        EXECUTE_READWRITE = 0x40,
        EXECUTE_WRITECOPY = 0x80,
        NOACCESS = 0x01,
        READONLY = 0x02,
        READWRITE = 0x04,
        WRITECOPY = 0x08,
        GUARD_Modifierflag = 0x100,
        NOCACHE_Modifierflag = 0x200,
        WRITECOMBINE_Modifierflag = 0x400
    }
}   
于 2012-10-26T01:36:24.517 回答