8

我有以下C结构

struct MyStruct {
    char chArray[96];
    __int64 offset;
    unsigned count;
}

我现在有一堆用 C 语言创建的文件,其中包含数千个这样的结构。我需要使用 C# 阅读它们并且速度是一个问题。

我在 C# 中完成了以下操作

[StructLayout(LayoutKind.Sequential, CharSet = CharSet.Ansi, Size = 108)]
public struct PreIndexStruct {
    [MarshalAs(UnmanagedType.ByValTStr, SizeConst = 96)]
    public string Key;
    public long Offset;
    public int Count;
}

然后我使用从文件中读取数据

using (BinaryReader br = new BinaryReader(
       new FileStream(pathToFile, FileMode.Open, FileAccess.Read, 
                      FileShare.Read, bufferSize))) 
{
    long length = br.BaseStream.Length;
    long position = 0;

    byte[] buff = new byte[structSize];
    GCHandle buffHandle = GCHandle.Alloc(buff, GCHandleType.Pinned);
    while (position < length) {
        br.Read(buff, 0, structSize);
        PreIndexStruct pis = (PreIndexStruct)Marshal.PtrToStructure(
            buffHandle.AddrOfPinnedObject(), typeof(PreIndexStruct));
        structures.Add(pis);

        position += structSize;
    }
    buffHandle.Free();
}

这非常有效,我可以从文件中很好地检索数据。

我已经读到如果我使用 C++/CLI 或 C# 不安全代码而不是使用 GCHandle.Alloc/Marshal.PtrToStructure 可以加快速度。我找到了一些示例,但它们仅指没有固定大小数组的结构。

我的问题是,对于我的特殊情况,是否有更快的方法来处理 C++/CLI 或 C# 不安全代码?

编辑

其他性能信息(我使用过 ANTS Performance Profiler 7.4):

对 Marshal.PtrToStructure 的调用占用了我 66% 的 CPU 时间。

关于 I/O,105 毫秒中只有 6 毫秒用于读取文件。

4

3 回答 3

4

在这种情况下,您不需要显式使用 P/Invoke,因为您不必在托管代码和本机代码之间来回传递结构。所以你可以这样做。它将避免这种无用的 GC 句柄分配,并且只分配需要的东西。

public struct PreIndexStruct {
    public string Key;
    public long Offset;
    public int Count;
}

while (...) {
    ...
    PreIndexStruct pis = new PreIndexStruct();
    pis.Key = Encoding.Default.GetString(reader.ReadBytes(96));
    pis.Offset = reader.ReadInt64();
    pis.Count = reader.ReadInt32();
    structures.Add(pis);
}

我不确定你能比这快得多。

于 2013-01-30T14:36:23.480 回答
1

您可能更正确地想使用非托管代码,这就是我要做的:

  1. 创建一个 C++/CLI 项目并将现有的 c# 代码移植到那里并运行
  2. 确定瓶颈在哪里(使用分析器)
  3. 直接用 C++ 重写代码的那部分,从 C++/CLI 代码中调用它并确保它工作,再次分析它
  4. 用“#pragma unmanaged”包围你的新代码
  5. 再次分析它

您可能会获得一定程度的速度提升,但这可能不是您所期望的。

于 2013-01-30T14:31:49.000 回答
0

快速读取一些结构数组是可能的,但是因为这种技术需要 blittable 类型,所以唯一的方法是为 Key 制作一个固定的字节缓冲区,而不是使用字符串。

如果你这样做,你必须使用不安全的代码,所以它可能并不值得。

然而,只是为了好奇,这就是你如何对这些结构进行超级快速的读写,代价是不得不允许不安全的代码和大量的小提琴:

using System;
using System.ComponentModel;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.IO;
using System.Runtime.InteropServices;


namespace Demo
{
    public static class Program
    {
        [StructLayout(LayoutKind.Sequential, CharSet = CharSet.Ansi, Size = 108)]
        public struct PreIndexStruct
        {
            public unsafe fixed byte Key[96];
            public long Offset;
            public int Count;
        }

        private static void Main(string[] args)
        {
            PreIndexStruct[] a = new PreIndexStruct[100];

            for (int i = 0; i < a.Length; ++i)
            {
                a[i].Count = i;

                unsafe
                {
                    fixed (byte* key = a[i].Key)
                    {
                        for (int j = 0; j < 10; ++j)
                        {
                            key[j] = (byte)i;
                        }
                    }
                }
            }

            using (var output = File.Create(@"C:\TEST\TEST.BIN"))
            {
                FastWrite(output, a, 0, a.Length);
            }

            using (var input = File.OpenRead(@"C:\TEST\TEST.BIN"))
            {
                var b = FastRead<PreIndexStruct>(input, a.Length);

                for (int i = 0; i < b.Length; ++i)
                {
                    Console.Write("Count = " + b[i].Count + ", Key =");

                    unsafe
                    {
                        fixed (byte* key = b[i].Key)
                        {
                            // Here you would access the bytes in Key[], which would presumably be ANSI chars.

                            for (int j = 0; j < 10; ++j)
                            {
                                Console.Write(" " + key[j]);
                            }
                        }
                    }

                    Console.WriteLine();
                }
            }
        }

        /// <summary>
        /// Writes a part of an array to a file stream as quickly as possible,
        /// without making any additional copies of the data.
        /// </summary>
        /// <typeparam name="T">The type of the array elements.</typeparam>
        /// <param name="fs">The file stream to which to write.</param>
        /// <param name="array">The array containing the data to write.</param>
        /// <param name="offset">The offset of the start of the data in the array to write.</param>
        /// <param name="count">The number of array elements to write.</param>
        /// <exception cref="IOException">Thrown on error. See inner exception for <see cref="Win32Exception"/></exception>

        [SuppressMessage("Microsoft.Reliability", "CA2001:AvoidCallingProblematicMethods", MessageId="System.Runtime.InteropServices.SafeHandle.DangerousGetHandle")]

        public static void FastWrite<T>(FileStream fs, T[] array, int offset, int count) where T: struct
        {
            int sizeOfT = Marshal.SizeOf(typeof(T));
            GCHandle gcHandle = GCHandle.Alloc(array, GCHandleType.Pinned);

            try
            {
                uint bytesWritten;
                uint bytesToWrite = (uint)(count * sizeOfT);

                if
                (
                    !WriteFile
                    (
                        fs.SafeFileHandle.DangerousGetHandle(),
                        new IntPtr(gcHandle.AddrOfPinnedObject().ToInt64() + (offset*sizeOfT)),
                        bytesToWrite,
                        out bytesWritten,
                        IntPtr.Zero
                    )
                )
                {
                    throw new IOException("Unable to write file.", new Win32Exception(Marshal.GetLastWin32Error()));
                }

                Debug.Assert(bytesWritten == bytesToWrite);
            }

            finally
            {
                gcHandle.Free();
            }
        }

        /// <summary>
        /// Reads array data from a file stream as quickly as possible,
        /// without making any additional copies of the data.
        /// </summary>
        /// <typeparam name="T">The type of the array elements.</typeparam>
        /// <param name="fs">The file stream from which to read.</param>
        /// <param name="count">The number of elements to read.</param>
        /// <returns>
        /// The array of elements that was read. This may be less than the number that was
        /// requested if the end of the file was reached. It may even be empty.
        /// NOTE: There may still be data left in the file, even if not all the requested
        /// elements were returned - this happens if the number of bytes remaining in the
        /// file is less than the size of the array elements.
        /// </returns>
        /// <exception cref="IOException">Thrown on error. See inner exception for <see cref="Win32Exception"/></exception>

        [SuppressMessage("Microsoft.Reliability", "CA2001:AvoidCallingProblematicMethods", MessageId="System.Runtime.InteropServices.SafeHandle.DangerousGetHandle")]

        public static T[] FastRead<T>(FileStream fs, int count) where T: struct
        {
            int sizeOfT = Marshal.SizeOf(typeof(T));

            long bytesRemaining  = fs.Length - fs.Position;
            long wantedBytes     = count * sizeOfT;
            long bytesAvailable  = Math.Min(bytesRemaining, wantedBytes);
            long availableValues = bytesAvailable / sizeOfT;
            long bytesToRead     = (availableValues * sizeOfT);

            if ((bytesRemaining < wantedBytes) && ((bytesRemaining - bytesToRead) > 0))
            {
                Debug.WriteLine("Requested data exceeds available data and partial data remains in the file.", "Dmr.Common.IO.Arrays.FastRead(fs,count)");
            }

            T[] result = new T[availableValues];

            GCHandle gcHandle = GCHandle.Alloc(result, GCHandleType.Pinned);

            try
            {
                uint bytesRead = 0;

                if
                (
                    !ReadFile
                    (
                        fs.SafeFileHandle.DangerousGetHandle(),
                        gcHandle.AddrOfPinnedObject(),
                        (uint)bytesToRead,
                        out bytesRead,
                        IntPtr.Zero
                    )
                )
                {
                    throw new IOException("Unable to read file.", new Win32Exception(Marshal.GetLastWin32Error()));
                }

                Debug.Assert(bytesRead == bytesToRead);
            }

            finally
            {
                gcHandle.Free();
            }

            return result;
        }


        /// <summary>See the Windows API documentation for details.</summary>

        [SuppressMessage("Microsoft.Interoperability", "CA1415:DeclarePInvokesCorrectly")]
        [DllImport("kernel32.dll", SetLastError=true)]
        [return: MarshalAs(UnmanagedType.Bool)]

        private static extern bool ReadFile
        (
            IntPtr hFile,
            IntPtr lpBuffer,
            uint nNumberOfBytesToRead,
            out uint lpNumberOfBytesRead,
            IntPtr lpOverlapped
        );

        /// <summary>See the Windows API documentation for details.</summary>

        [SuppressMessage("Microsoft.Interoperability", "CA1415:DeclarePInvokesCorrectly")]
        [DllImport("kernel32.dll", SetLastError=true)]
        [return: MarshalAs(UnmanagedType.Bool)]

        private static extern bool WriteFile
        (
            IntPtr hFile,
            IntPtr lpBuffer,
            uint nNumberOfBytesToWrite,
            out uint lpNumberOfBytesWritten,
            IntPtr lpOverlapped
        );
    }
}
于 2013-01-30T14:42:56.687 回答