0

我编写了一个程序来在 Ubuntu 中用 C 语言分割文件。在读取文件中获取缓冲区时出现错误。这是我的代码。

    int split(char *filename, unsigned long part) {
        FILE *fp;
        char *buffer;
        size_t result; // bytes read
        off_t fileSize;

        fp = fopen(filename, "rb");
        if (fp == NULL) {
            fprintf(stderr, "Cannot Open %s", filename);
            exit(2);
        }
    // Get Size
        fileSize = get_file_size(filename);
    // Buffer
        buffer = (char*) malloc(sizeof(char) * (fileSize + 1));
        if (buffer == NULL) {
            fputs("Memory error", stderr);
            fclose(fp);
            return 1;
        }
    // Copy file into buffer
    //char buffers[11];
        result = fread(buffer, 1, fileSize, fp);
        buffer[fileSize] = '\0';

        if (result != fileSize) {
            fputs("Reading error", stderr);
            return 1;
        }

    // Split file
        off_t partSize = fileSize / part;
    // Last Part
        off_t lastPartSize = fileSize - partSize * part;
        unsigned long i;
        unsigned long j;
        // create part 1 to n-1
        for (j = 0; j < part; j++) {
            char partName[255];
            char *content;
            char partNumber[3];
            // Content of file part
    //      for (i = j; i < partSize * (j + 1); i++) {
    //
    //      }
            content = (char*) malloc(sizeof(char) * partSize);
            content = copychar(buffer, j + i, partSize + i);
            i += partSize;
            //copy name
            strcpy(partName, filename);
            // part Number
            sprintf(partNumber, "%d", j);
            // file name with .part1 2 3 4 ....
            strcat(partName, ".part");
            strcat(partName, partNumber);
            // Write to file
            writeFile(partName, content);
            free(content);
        }
    // last part
    char *content;
    content = (char*) malloc(sizeof(char) * (fileSize - partSize * (part - 1)));
    content = copychar(buffer, (part - 1) * partSize + 1, fileSize);
    char lastPartNumber[3];
    char lastPartName[255];
    sprintf(lastPartNumber, "%d", part);
    strcpy(lastPartName, filename);
    strcat(lastPartName, ".part");
    strcat(lastPartName, lastPartNumber);
    writeFile(lastPartName, content);
    free(content);

    free(buffer);
    fclose(fp);
    return 0;    
}

这是从头到尾的函数copychar

char *copychar(char* buffer, unsigned long start, unsigned long end) {
    if (start >= end)
        return NULL;
    char *result;
    result = (char*) malloc(sizeof(char) * (end - start) + 1);
    unsigned long i;
    for (i = start; i <= end; i++)
        result[i] = buffer[i];
    result[end] = '\0';
    return result;
}

这是获取文件大小的功能

off_t get_file_size(char *filename) {
    struct stat st;
    if (stat(filename, &st) == 0)
        return st.st_size;
    fprintf(stderr, "Cannot determine size of %s: %s\n", filename);
    return -1;
}

这是写文件的功能

    int writeFile(char* filename, char*buffer) {
    if (buffer == NULL || filename == NULL)
        return 1;
    FILE *file;
    file = fopen(filename, "wb");
    fwrite(buffer, sizeof(char), sizeof(buffer) + 1, file);
    fclose(file);
    return 0;
}

当我测试时,我使用文件测试 29MB 并且它被转储了。我调试它返回 fileSize true 但是当缓冲区中的 readfile 从文件中获取时它只返回 135 个字符并且当使用 copychar 时它出错。

    Breakpoint 1, 0x0000000000400a0b in copychar (buffer=0x7ffff5e3a010 "!<arch>\ndebian-binary   1342169369  0     0     100644  4         `\n2.0\ncontrol.tar.gz  1342169369  0     0     100644  4557      `\n\037\213\b", start=4154703576, end=4164450461) at final.c:43

Program received signal SIGSEGV, Segmentation fault.
0x0000000000400a0b in copychar (buffer=0x7ffff5e3a010 "!<arch>\ndebian-binary   1342169369  0     0     100644  4         `\n2.0\ncontrol.tar.gz  1342169369  0     0     100644  4557      `\n\037\213\b", start=4154703576, end=4164450461) at final.c:43

程序因信号 SIGSEGV、分段错误而终止。该程序不再存在。

我不知道如何在拆分时将缓冲区分成部分以写入部分。感谢提前!

4

2 回答 2

1

您可能已经注意到,将文件复制到 1 个大块中是非常不切实际的。这不是必需的。

在最简单的级别,您可以逐字节复制文件,如下所示

while( ( ch = fgetc(source) ) != EOF ) {
   fputc(ch, target);
}

这会起作用,但它会很慢。最好在块中复制,如下所示:

 unsigned char buf[4096];
 size_t size;
 while( (size = fread(buf, 1, sizeof(buf), fpRead) ) > 0) {
     fwrite(buf, 1, size, fpWrite);
 }

请注意,生成的代码更简单,并且不包含动态内存分配。

当然,您仍然需要添加拆分逻辑,但这可以通过跟踪写入的字节数并在实际写入之前打开一个新的写入文件来完成。

编辑:如何处理多部分方面 - 示意性地,您仍然需要对一些特殊情况和不同系统调用的测试结果进行额外检查

 unsigned char buf[4096];
 size_t size;
 size_t partsize = 100000; // asssuming you want to write 100k parts.
 size_t stilltobewritten = partsize; // bytes remaining to be written in current part
 size_t chunksize = sizeof(buf); // first time around we read full buffersize
 while( (size = fread(buf, 1, chunksize, fpRead) ) > 0) {
     fwrite(buf, 1, size, fpWrite);
     stilltobewritten -= size; // subtract bytes written from saldo
     if (stilltobewritten == 0) {
         // part is complete, close this part and open next
         fclose(fpWrite);
         fpWrite = fopen(nextpart,"wb");
         // and reinit variables
         stilltobewritten = partsize;
         chunksize = sizeof(buf);
     } else {
         // prep next round on present file - just the special case of the last block
         // to handle
         chunksize = (stilltobewritten > sizeof(buf)) ? sizeof(buf) : stilltobewritten;
     }
 }

和编辑 2:文件部分名称也可以变得更简单:

 sprintf(partName, "%s.part%d",file, j);
于 2012-07-28T15:08:51.730 回答
1

关于原始代码,copychar 中有一些start混淆end。首先,您的意思可能sizeof(char) * (end - start + 1)不是sizeof(char) * (end - start) + 1在 malloc 中,其次,您从原始缓冲区 ( for (i = start; i <= end; i++)) 复制 end-start+1 符号,然后用 覆盖最后一个符号'\0',这可能不是预期的行为。

于 2012-07-28T15:13:37.717 回答