这是两个独立的问题,两者都是可以解决的。
首先是如何附加到 tar 文件。您需要做的就是用您的文件覆盖最后两个归零的 512 字节块。您将编写 512 字节的 tar 标头,将您的文件四舍五入为 512 字节块的整数,然后用 0 填充两个 512 字节块以标记 tar 文件的新结尾。
第二个是如何频繁地追加到一个 gzip 文件。最简单的方法是编写单独的 gzip 流并将它们连接起来。将最后两个 512 字节的零块写入单独的 gzip 流中,并记住从哪里开始。然后用带有新 tar 条目的新 gzip 流覆盖它,然后用两个末端块覆盖另一个 gzip 流。这可以通过在文件中搜索lseek()
然后使用gzdopen()
从那里开始写入来完成。
对于添加的大文件(至少 10 的 K),这将工作得很好,压缩效果很好。但是,如果您要添加非常小的文件,则简单地连接小型 gzip 流将导致糟糕的压缩,或者更糟的是,扩展。您可以做一些更复杂的事情,将少量数据实际添加到单个 gzip 流中,以便压缩算法可以利用前面的数据进行关联和字符串匹配。为此,请查看zlib发行版中gzlog.h和gzlog.c中examples/
的方法。
以下是如何执行简单方法的示例:
/* tapp.c -- Example of how to append to a tar.gz file with concatenated gzip
streams. Placed in the public domain by Mark Adler, 16 Jan 2013. */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <unistd.h>
#include <fcntl.h>
#include "zlib.h"
#define local static
/* Build an allocated string with the prefix string and the NULL-terminated
sequence of words strings separated by spaces. The caller should free the
returned string when done with it. */
local char *build_cmd(char *prefix, char **words)
{
size_t len;
char **scan;
char *str, *next;
len = strlen(prefix) + 1;
for (scan = words; *scan != NULL; scan++)
len += strlen(*scan) + 1;
str = malloc(len); assert(str != NULL);
next = stpcpy(str, prefix);
for (scan = words; *scan != NULL; scan++) {
*next++ = ' ';
next = stpcpy(next, *scan);
}
return str;
}
/* Usage:
tapp archive.tar.gz addthis.file andthisfile.too
tapp will create a new archive.tar.gz file if it doesn't exist, or it will
append the files to the existing archive.tar.gz. tapp must have been used
to create the archive in the first place. If it did not, then tapp will
exit with an error and leave the file unchanged. Each use of tapp appends a
new gzip stream whose compression cannot benefit from the files already in
the archive. As a result, tapp should not be used to append a small amount
of data at a time, else the compression will be particularly poor. Since
this is just an instructive example, the error checking is done mostly with
asserts.
*/
int main(int argc, char **argv)
{
int tgz;
off_t offset;
char *cmd;
FILE *pipe;
gzFile gz;
int page;
size_t got;
int ret;
ssize_t raw;
unsigned char buf[3][512];
const unsigned char z1k[] = /* gzip stream of 1024 zeros */
{0x1f, 0x8b, 8, 0, 0, 0, 0, 0, 2, 3, 0x63, 0x60, 0x18, 5, 0xa3, 0x60,
0x14, 0x8c, 0x54, 0, 0, 0x2e, 0xaf, 0xb5, 0xef, 0, 4, 0, 0};
if (argc < 2)
return 0;
tgz = open(argv[1], O_RDWR | O_CREAT, 0644); assert(tgz != -1);
offset = lseek(tgz, 0, SEEK_END); assert(offset == 0 || offset >= (off_t)sizeof(z1k));
if (offset) {
if (argc == 2) {
close(tgz);
return 0;
}
offset = lseek(tgz, -sizeof(z1k), SEEK_END); assert(offset != -1);
raw = read(tgz, buf, sizeof(z1k)); assert(raw == sizeof(z1k));
if (memcmp(buf, z1k, sizeof(z1k)) != 0) {
close(tgz);
fprintf(stderr, "tapp abort: %s was not created by tapp\n", argv[1]);
return 1;
}
offset = lseek(tgz, -sizeof(z1k), SEEK_END); assert(offset != -1);
}
if (argc > 2) {
gz = gzdopen(tgz, "wb"); assert(gz != NULL);
cmd = build_cmd("tar cf - -b 1", argv + 2);
pipe = popen(cmd, "r"); assert(pipe != NULL);
free(cmd);
got = fread(buf, 1, 1024, pipe); assert(got == 1024);
page = 2;
while ((got = fread(buf[page], 1, 512, pipe)) == 512) {
if (++page == 3)
page = 0;
ret = gzwrite(gz, buf[page], 512); assert(ret == 512);
} assert(got == 0);
ret = pclose(pipe); assert(ret != -1);
ret = gzclose(gz); assert(ret == Z_OK);
tgz = open(argv[1], O_WRONLY | O_APPEND); assert(tgz != -1);
}
raw = write(tgz, z1k, sizeof(z1k)); assert(raw == sizeof(z1k));
close(tgz);
return 0;
}