1

我有一个十六进制文件,如下所示。

第1部分

1F 00 1C 3A 1F 00 25 3A 1F 00 09 3A 1F 00 50 3A 
1F 00 5A 3A 1F 00 5C 3A 1F 00 5B 3A 1F 00 59 3A
1F 00 5D 3A 03 00 FE 0F 1F 00 01 30 1F 00 06 3A 
1F 00 11 3A 1F 00 44 3A 1F 00 4F 3A 1F 00 45 3A 
1F 10 56 3A 1F 10 54 3A 1F 00 03 30 1F 00 02 30 
03 00 55 3A 03 00 71 3A 1F 00 29 3A 1F 00 27 3A 
1F 00 2A 3A 1F 00 28 3A 1F 00 26 3A 1F 00 51 3A 
1F 00 08 3A 1F 00 24 3A 1F 00 21 3A 1F 00 16 3A
1F 00 17 3A 1F 00 18 3A 1F 00 19 3A 1F 00 0A 80
1F 00 48 3A 1F 10 58 3A 02 00 4D 3A 40 00 42 3A 
40 00 41 3A 1F 00 04 30 1F 10 00 80 03 00 01 80
02 01 FF 0F

第2部分

40 00 08 30 03 00 71 3A 03 00 55 3A 1F 00 02 30 
1F 00 03 30 1F 10 54 3A 1F 10 56 3A 1F 00 06 3A 
1F 00 01 30 03 00 FE 0F 02 01 FF 0F

第 3 部分

40 00 08 30 03 00 71 3A 03 00 55 3A 1F 00 02 30 
1F 00 03 30 1F 10 54 3A 1F 10 56 3A 1F 00 11 3A 
1F 00 06 3A 1F 00 01 30 03 00 FE 0F 02 01 FF 0F 

第 4 部分

1F 00 5D 3A 03 00 71 3A 03 00 55 3A 1F 00 02 30 
1F 00 03 30 1F 10 54 3A 1F 10 56 3A 1F 00 45 3A 
1F 00 4F 3A 1F 00 44 3A 1F 00 11 3A 1F 00 06 3A 
1F 00 01 30 02 01 FF 0F

第 5 部分

40 00 08 30 1F 00 03 30 1F 00 02 30 1F 00 01 30 
02 01 FF 0F

我的文件有很多像上面这样的数据部分。每个数据部分以 bytes 结尾02 01 FF 0F。这里我只展示五个部分。

每次我都不知道每个数据部分有多少字节可用。所有部分都放在一个文件中。我想读取所有部分并将所有部分存储在单独的内存中,即数组或链表,以便我能够访问所有字节。

我的代码如下:

int n = 500;     //where n is the number of parts in my file
for(int i = 0; i < n; i++)
{
  rewind(pFile);
  fread(&a, 1, 4, pFile);
  if((a==0x0FFF0102) || (a==0x8004001F) || (a==0x800D001F))
  {
    continue;
  }
  fseek(pFile, -4, SEEK_CUR);
  while(a!=0x0FFF0102)
  {
    fread(&a, 1, 4, pFile);
    // now  what can I do here. where I store all above hex data.

  }
}
4

3 回答 3

2

如果您的文件比您机器上的可用内存小得多,这个答案就成立了。

  1. 使用 .分配您认为适合整个文件的大缓冲区calloc()
  2. 开始按块读取文件。
    • 记录你读了多少。
  3. 将读取的内容放入分配的缓冲区。
    • 如果缓冲区无法容纳读取的数据,请使用realloc()扩大缓冲区
  4. 阅读整个文件后,创建另一个数组,这次是char*指针 - 用于零件起始列表
  5. 将指向读取缓冲区开头的指针添加到部件开始列表
  6. 开始读取读取缓冲区并搜索02 01 FF 0F部分结束字节
  7. 将每个找到的字节序列之后的位置添加到部件开始列表中。同样,根据需要重新分配。
  8. 如果文件是文本文件,则用 '\0' 替换结束字节;否则什么也不做
  9. 重复直到到达缓冲区的末尾
  10. 将部件开始列表中的每个条目视为字符串并对其执行操作。它的结束由下一个条目的开始标记。
于 2013-10-23T12:20:24.200 回答
1

我正在根据Dariusz 的回答建立我的回答。通常我会把它放在评论中,但我写得太多了。

问题在于,根据程序对内存的一般使用情况,使用malloc()较小的内存块可能会更成功。

  1. malloc()足够的内存,以便指针适合。
  2. 逐块读取到某个临时缓冲区中。
  3. 一旦你有一个完整的块,malloc()它的内存并将数据放入。记住在某处记下大小。
  4. 将该指针放入指针列表中,realloc()根据需要对其进行 ing。

Dariusz 的解决方案有一些优点——例如,您可以通过减去指针来获得块的大小——但是,如前所述,while 文件的一个大块可能会导致问题,具体取决于文件大小。


另一种解决方案是使用mmap(),它允许您在内存中映射磁盘文件。

这为您提供了一个指向虚拟内存的指针,您可以在其中准确地从磁盘文件中找到字节。

在这种情况下,从第 4 点开始,更接近 Dariusz 的答案。

于 2013-10-24T05:33:59.623 回答
1

大锤,遇见坚果;坚果,大锤。

#include <assert.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "imageprt.h"
#include "stderr.h"

enum { EOS_MARKER = 0x0FFF0102 };

typedef struct Section
{
    size_t   length;
    int32_t *data;
} Section;

typedef struct Description
{
    size_t  n_sections;
    Section *sections;
    int32_t *data;
} Description;

static void free_description(Description *dp);
static Description *read_description(FILE *fp, char const *fn);
static void dump_description(char const *tag, Description const *desc);

int main(int argc, char **argv)
{
    err_setarg0(argv[0]);
    for (int i = 1; i < argc; i++)
    {
        FILE *fp = fopen(argv[i], "rb");
        if (fp == 0)
            err_sysrem("Failed to open file %s for reading\n", argv[i]);
        else
        {
            Description *desc = read_description(fp, argv[i]);
            dump_description("Description", desc);
            fclose(fp);
            free_description(desc);
        }
    }
    return(0);
}

static void dump_description(char const *tag, Description const *desc)
{
    assert(desc != 0);
    printf("%s: %p\n", tag, (void *)desc);
    printf("Number of sections: %zu\n", desc->n_sections);
    if (desc->n_sections != 0)
    {
        assert(desc->sections != 0);
        assert(desc->data != 0);
        for (size_t i = 0; i < desc->n_sections; i++)
        {
            size_t offset = (desc->sections[i].data - desc->data) * sizeof(int32_t);
            printf("Section %zu:\n", i);
            image_print(stdout, offset, (char *)desc->sections[i].data,
                desc->sections[i].length * sizeof(int32_t));
        }
    }
}

static void free_description(Description *dp)
{
    assert(dp != 0);
    free(dp->sections);
    free(dp->data);
    free(dp);
}

static Description *read_description(FILE *fp, char const *fn)
{
    fseek(fp, 0L, SEEK_END);
    size_t n_bytes = ftell(fp);
    fseek(fp, 0L, SEEK_SET);
    if (n_bytes % sizeof(int32_t) != 0)
    {
        err_remark("Length of file (%zu) is not a multiple of %zu bytes\n",
                n_bytes, sizeof(int32_t));
        return 0;
    }

    Description *desc = (Description *)calloc(1, sizeof(Description));
    if (desc == 0)
        err_syserr("Failed to allocate memory\n");

    desc->data = (int32_t *)malloc(n_bytes);
    if (desc->data == 0)
        err_syserr("Failed to allocate memory\n");

    size_t n_read = fread(desc->data, 1, n_bytes, fp);
    if (n_read != n_bytes)
        err_syserr("Short read on file %s\n", fn);

    //image_print(stderr, 0, (char *)desc->data, n_bytes);

    /* All data in memory — how many sections? */
    size_t n_values = n_bytes / sizeof(int32_t);
    size_t n_sects = 0;
    for (size_t i = 0; i < n_values; i++)
    {
        if (desc->data[i] == EOS_MARKER)
            n_sects++;
    }
    //err_remark("Found %zu sections\n", n_sects);

    desc->sections = (Section *)malloc(n_sects * sizeof(Section));
    size_t sec_num = 0;
    int32_t p_value = EOS_MARKER;
    for (size_t i = 0; i < n_values; i++)
    {
        if (p_value == EOS_MARKER)
        {
            //err_remark("Found EOS_MARKER: section %zu, index %zu\n", sec_num, i);
            //image_print(stderr, 0, (char *)&desc->data[i], (n_values - i) * sizeof(int32_t));
            desc->sections[sec_num].data = &desc->data[i];
            //err_remark("Section %zu: data %p\n", sec_num, (void *)desc->sections[sec_num].data);
            if (i > 0)
            {
                assert(sec_num > 0);
                desc->sections[sec_num-1].length = &desc->data[i] - desc->sections[sec_num-1].data;
                //err_remark("Section %zu: length %zu\n", sec_num-1, desc->sections[sec_num-1].length);
            }
            sec_num++;
        }
        p_value = desc->data[i];
    }
    assert(sec_num == n_sects);
    desc->sections[sec_num-1].length = &desc->data[n_values] - desc->sections[sec_num-1].data;

    if (p_value != EOS_MARKER)
        err_syserr("The file %s did not finish with the section marker!\n", fn);
    desc->n_sections = n_sects;

    return desc;
}

标头imageprt.h声明'image_print() , a function in my personal library that formats hex dumps. The headerstderr.h defines error reporting functions such aserr_remark() anderr_syserr()`(分别报告消息并继续,报告消息,系统错误和停止)。

该代码将整个文件放入一块内存中,然后将其划分为多个部分。该Description结构包含描述。代码在读取时扫描数据两次(并再次打印)。如果文件将是多个千兆字节,则最好一次构建部分列表。您还可以考虑对文件进行内存映射。

输入数据的十六进制转储

0x0000: 1F 00 1C 3A 1F 00 25 3A 1F 00 09 3A 1F 00 50 3A   ...:..%:...:..P:
0x0010: 1F 00 5A 3A 1F 00 5C 3A 1F 00 5B 3A 1F 00 59 3A   ..Z:..\:..[:..Y:
0x0020: 1F 00 5D 3A 03 00 FE 0F 1F 00 01 30 1F 00 06 3A   ..]:.......0...:
0x0030: 1F 00 11 3A 1F 00 44 3A 1F 00 4F 3A 1F 00 45 3A   ...:..D:..O:..E:
0x0040: 1F 10 56 3A 1F 10 54 3A 1F 00 03 30 1F 00 02 30   ..V:..T:...0...0
0x0050: 03 00 55 3A 03 00 71 3A 1F 00 29 3A 1F 00 27 3A   ..U:..q:..):..':
0x0060: 1F 00 2A 3A 1F 00 28 3A 1F 00 26 3A 1F 00 51 3A   ..*:..(:..&:..Q:
0x0070: 1F 00 08 3A 1F 00 24 3A 1F 00 21 3A 1F 00 16 3A   ...:..$:..!:...:
0x0080: 1F 00 17 3A 1F 00 18 3A 1F 00 19 3A 1F 00 0A 80   ...:...:...:....
0x0090: 1F 00 48 3A 1F 10 58 3A 02 00 4D 3A 40 00 42 3A   ..H:..X:..M:@.B:
0x00A0: 40 00 41 3A 1F 00 04 30 1F 10 00 80 03 00 01 80   @.A:...0........
0x00B0: 02 01 FF 0F 40 00 08 30 03 00 71 3A 03 00 55 3A   ....@..0..q:..U:
0x00C0: 1F 00 02 30 1F 00 03 30 1F 10 54 3A 1F 10 56 3A   ...0...0..T:..V:
0x00D0: 1F 00 06 3A 1F 00 01 30 03 00 FE 0F 02 01 FF 0F   ...:...0........
0x00E0: 40 00 08 30 03 00 71 3A 03 00 55 3A 1F 00 02 30   @..0..q:..U:...0
0x00F0: 1F 00 03 30 1F 10 54 3A 1F 10 56 3A 1F 00 11 3A   ...0..T:..V:...:
0x0100: 1F 00 06 3A 1F 00 01 30 03 00 FE 0F 02 01 FF 0F   ...:...0........
0x0110: 1F 00 5D 3A 03 00 71 3A 03 00 55 3A 1F 00 02 30   ..]:..q:..U:...0
0x0120: 1F 00 03 30 1F 10 54 3A 1F 10 56 3A 1F 00 45 3A   ...0..T:..V:..E:
0x0130: 1F 00 4F 3A 1F 00 44 3A 1F 00 11 3A 1F 00 06 3A   ..O:..D:...:...:
0x0140: 1F 00 01 30 02 01 FF 0F 40 00 08 30 1F 00 03 30   ...0....@..0...0
0x0150: 1F 00 02 30 1F 00 01 30 02 01 FF 0F               ...0...0....
0x015C:

示例输出

Description: 0x7fc58bc03a20
Number of sections: 5
Section 0:
0x0000: 1F 00 1C 3A 1F 00 25 3A 1F 00 09 3A 1F 00 50 3A   ...:..%:...:..P:
0x0010: 1F 00 5A 3A 1F 00 5C 3A 1F 00 5B 3A 1F 00 59 3A   ..Z:..\:..[:..Y:
0x0020: 1F 00 5D 3A 03 00 FE 0F 1F 00 01 30 1F 00 06 3A   ..]:.......0...:
0x0030: 1F 00 11 3A 1F 00 44 3A 1F 00 4F 3A 1F 00 45 3A   ...:..D:..O:..E:
0x0040: 1F 10 56 3A 1F 10 54 3A 1F 00 03 30 1F 00 02 30   ..V:..T:...0...0
0x0050: 03 00 55 3A 03 00 71 3A 1F 00 29 3A 1F 00 27 3A   ..U:..q:..):..':
0x0060: 1F 00 2A 3A 1F 00 28 3A 1F 00 26 3A 1F 00 51 3A   ..*:..(:..&:..Q:
0x0070: 1F 00 08 3A 1F 00 24 3A 1F 00 21 3A 1F 00 16 3A   ...:..$:..!:...:
0x0080: 1F 00 17 3A 1F 00 18 3A 1F 00 19 3A 1F 00 0A 80   ...:...:...:....
0x0090: 1F 00 48 3A 1F 10 58 3A 02 00 4D 3A 40 00 42 3A   ..H:..X:..M:@.B:
0x00A0: 40 00 41 3A 1F 00 04 30 1F 10 00 80 03 00 01 80   @.A:...0........
0x00B0: 02 01 FF 0F                                       ....
Section 1:
0x00B4: 40 00 08 30 03 00 71 3A 03 00 55 3A 1F 00 02 30   @..0..q:..U:...0
0x00C4: 1F 00 03 30 1F 10 54 3A 1F 10 56 3A 1F 00 06 3A   ...0..T:..V:...:
0x00D4: 1F 00 01 30 03 00 FE 0F 02 01 FF 0F               ...0........
Section 2:
0x00E0: 40 00 08 30 03 00 71 3A 03 00 55 3A 1F 00 02 30   @..0..q:..U:...0
0x00F0: 1F 00 03 30 1F 10 54 3A 1F 10 56 3A 1F 00 11 3A   ...0..T:..V:...:
0x0100: 1F 00 06 3A 1F 00 01 30 03 00 FE 0F 02 01 FF 0F   ...:...0........
Section 3:
0x0110: 1F 00 5D 3A 03 00 71 3A 03 00 55 3A 1F 00 02 30   ..]:..q:..U:...0
0x0120: 1F 00 03 30 1F 10 54 3A 1F 10 56 3A 1F 00 45 3A   ...0..T:..V:..E:
0x0130: 1F 00 4F 3A 1F 00 44 3A 1F 00 11 3A 1F 00 06 3A   ..O:..D:...:...:
0x0140: 1F 00 01 30 02 01 FF 0F                           ...0....
Section 4:
0x0148: 40 00 08 30 1F 00 03 30 1F 00 02 30 1F 00 01 30   @..0...0...0...0
0x0158: 02 01 FF 0F  

是的,image_print()我的十六进制转储程序中使用了与本程序相同的功能。

于 2013-10-24T14:08:39.020 回答