1

我已经开始使用 protobuf-net 库。它将序列化速度提高了 30%,但我对生成的文件大小有疑问。

我的数据模型是:

    [Serializable]
    [ProtoContract(SkipConstructor = true)]
    private class ReportDataItem
    {
        [ProtoMember(1)]
        public Int32 C11 { get; set; }
        [ProtoMember(2)]
        public Int32 C12 { get; set; }
        [ProtoMember(3)]
        public Int32 C13 { get; set; }
        [ProtoMember(4)]
        public Int32 C14 { get; set; }
        [ProtoMember(5)]
        public Int32 C15 { get; set; }
        [ProtoMember(6)]
        public Int32 C16 { get; set; }
        [ProtoMember(7)]
        public Int32 C17 { get; set; }
        [ProtoMember(8)]
        public Int32 C18 { get; set; }
        [ProtoMember(9)]
        public Int32 C19 { get; set; }
        [ProtoMember(10)]
        public Int32 C110 { get; set; }

        [ProtoMember(11)]
        public Int64 C21 { get; set; }
        [ProtoMember(12)]
        public Int64 C22 { get; set; }
        [ProtoMember(13)]
        public Int64 C23 { get; set; }
        [ProtoMember(14)]
        public Int64 C24 { get; set; }
        [ProtoMember(15)]
        public Int64 C25 { get; set; }
        [ProtoMember(16)]
        public Int64 C26 { get; set; }
        [ProtoMember(17)]
        public Int64 C27 { get; set; }
        [ProtoMember(18)]
        public Int64 C28 { get; set; }
        [ProtoMember(19)]
        public Int64 C29 { get; set; }
        [ProtoMember(20)]
        public Int64 C210 { get; set; }

        [ProtoMember(21)]
        public String C31 { get; set; }
        [ProtoMember(22)]
        public String C32 { get; set; }
        [ProtoMember(23)]
        public String C33 { get; set; }
        [ProtoMember(24)]
        public String C34 { get; set; }
        [ProtoMember(25)]
        public String C35 { get; set; }
        [ProtoMember(26)]
        public String C36 { get; set; }
        [ProtoMember(27)]
        public String C37 { get; set; }
        [ProtoMember(28)]
        public String C38 { get; set; }
        [ProtoMember(29)]
        public String C39 { get; set; }
        [ProtoMember(30)]
        public String C310 { get; set; }
    }

    [Serializable]
    [ProtoContract()]
    private class ReportData
    {
        [ProtoMember(1, DataFormat = DataFormat.Group)]
        public List<ReportDataItem> ReportDataItems { get; set; }
    }

    [Serializable]
    [ProtoContract()]
    private class Report
    {
        [ProtoMember(1)]
        public ReportData ReportData { get; set; }
    }

所以当我尝试序列化时:

    private static void ObjectSerialization()
    {

常量字符串 someData = @"qtwretyfsjdabvfsjdlfudspogds;kfg;lkfdsl;gkl;dsfkgl;kdfsgr;iweprpo\z\xlvcfmxzcbvjiorsdifdlf\jl;dsa";

            Report report = new Report();
            report.ReportData = new ReportData {ReportDataItems = new List<ReportDataItem>()};

            for (int j = 0; j < 10; j++)
            {
                ReportDataItem reportDataItem = new ReportDataItem();

                reportDataItem.C11 = j;
                reportDataItem.C12 = j;
                reportDataItem.C13 = j;
                reportDataItem.C14 = j;
                reportDataItem.C15 = j;
                reportDataItem.C16 = j;
                reportDataItem.C17 = j;
                reportDataItem.C18 = j;
                reportDataItem.C19 = j;
                reportDataItem.C110 = j;

                reportDataItem.C21 = j;
                reportDataItem.C22 = j;
                reportDataItem.C23 = j;
                reportDataItem.C24 = j;
                reportDataItem.C25 = j;
                reportDataItem.C26 = j;
                reportDataItem.C27 = j;
                reportDataItem.C28 = j;
                reportDataItem.C29 = j;
                reportDataItem.C210 = j;

                reportDataItem.C31 =someData;
                reportDataItem.C32 = someData;
                reportDataItem.C33 = someData;
                reportDataItem.C34 = someData;
                reportDataItem.C35 = someData;
                reportDataItem.C36 = someData;
                reportDataItem.C37 = someData;
                reportDataItem.C38 = someData;
                reportDataItem.C39 = someData;
                reportDataItem.C310 = someData;

                report.ReportData.ReportDataItems.Add(reportDataItem);
            }

            using (Stream stream = new FileStream(@"c:\Test\Object\0.bin", FileMode.Create, FileAccess.Write, FileShare.Write))
            {
                Serializer.Serialize(stream, report);
            }

            using (Stream stream = new FileStream(@"c:\Test\Object\bf_0.bin", FileMode.Create, FileAccess.Write, FileShare.Write))
            {
                BinaryFormatter formatter = new BinaryFormatter();
                formatter.Serialize(stream, report);
            }
}

我在下面提供了结果:

  • protobuf-net文件大小10428 字节
  • BinaryFormatter文件大小3458 字节

您能帮我找到减小结果 protobuf-net 文件大小的正确解决方案吗?我从 VS 包管理器作为包安装了 Protobuf-net。

4

1 回答 1

2

我将最后几行更改为:

using (Stream stream = new FileStream(@"pb.bin", FileMode.Create,
     FileAccess.Write, FileShare.Write))
{
    Serializer.Serialize(stream, report);
    Console.WriteLine(stream.Length);
}
Console.WriteLine(new FileInfo("pb.bin").Length);

using (Stream stream = new FileStream(@"bf.bin", FileMode.Create,
     FileAccess.Write, FileShare.Write))
{
    BinaryFormatter formatter = new BinaryFormatter();
    formatter.Serialize(stream, report);
    Console.WriteLine(stream.Length);
}
Console.WriteLine(new FileInfo("bf.bin").Length);

获取写入流的数据量,以及文件的最终大小。我的结果:

1628
1628
3144
3144

这对我来说看起来不错。请验证您的数据。

您是否有可能使用比“某些数据”更大的字符串?如果是这样,那么有一个重要的问题:您是否可能在实际代码中复制字符串?如果不是,则 BF 测试无效,因为默认情况下它将使用参考跟踪,因此只存储一次字符串 - 但您的真实数据的行为会非常不同。如果您多次使用相同的字符串,那么您可以在 protobuf-net 中模仿这种重用:

[ProtoMember(21, AsReference=true)]
public String C31 { get; set; }
[ProtoMember(22, AsReference = true)]
public String C32 { get; set; }
[ProtoMember(23, AsReference = true)]
public String C33 { get; set; }
[ProtoMember(24, AsReference = true)]
public String C34 { get; set; }
[ProtoMember(25, AsReference = true)]
public String C35 { get; set; }
[ProtoMember(26, AsReference = true)]
public String C36 { get; set; }
[ProtoMember(27, AsReference = true)]
public String C37 { get; set; }
[ProtoMember(28, AsReference = true)]
public String C38 { get; set; }
[ProtoMember(29, AsReference = true)]
public String C39 { get; set; }
[ProtoMember(30, AsReference = true)]
public String C310 { get; set; }

现在的输出:

939
939
3144
3144

然而!如果字符串通常重复,这将略微增加输出,并且会使其他 protobuf 实现难以使用它(它是有效的 protobuf 数据,但通过一些巫术)。

例如,如果您有自定义名称/国家/地区名称/状态等以字符串表示但重复批次的内容,则上述内容很有用。

于 2013-01-28T13:12:20.577 回答