1

菜鸟警告。

我正在尝试创建一个压缩程序。它将带有 ASCII 字符的 .txt 作为参数,并切断每个字符的二进制表示的前导 0。

它通过使用两个不同整数的最后 2 个字节来做到这一点。带前导零的字符被放入整数“write”的第 4 个字节,下一个字符被放入整数“temp”的第 3 个字节。然后将'temp' int 向右移动一次,然后与'write' 进行或运算,这样前导零槽就被我们需要的数据填充了。这重复,移位计数器在每个字符后增加。第一种情况有点奇怪。如果写在纸上,算法并不是很复杂。

我觉得我什么都试过了。我已经研究过算法很多次了。我很确定问题出在 shift_counter 达到 8 .. 但它应该可以正常工作。它只是没有。我可以在这里告诉你为什么(代码在下面):

这是我的输出的十六进制转储:

0000000    3f  00  00  00  41  10  68  9e  6e  c3  d9  65  10  88  5e  c6
0000020    d3  41  e6  74  9a  5d  06  d1  df  a0  7a  7d  5e  06  a5  dd
0000040    20  3a  bd  3c  a7  a7  dd  67  10  e8  5d  a7  83  e8  e8  72
0000060    19  a4  c7  c9  6e  a0  f1  f8  dd  86  cb  cb  f3  f9  3c    
0000077

和正确的输出:

0000000    3f  00  00  00  41  d0  3c  dd  86  b3  cb  20  7a  19  4f  07
0000020    99  d3  ec  32  88  fe  06  d5  e7  65  50  da  0d  a2  97  e7
0000040    f4  b4  fb  0c  7a  d7  e9  20  3a  ba  0c  d2  e3  64  37  d0
0000060    f8  dd  86  cb  cb  f3  79  fa  ed  76  29  00  0a  0a        
0000076

代码:

int compress(char *filename_ptr){

    int in_fd;
    in_fd = open(filename_ptr, O_RDONLY);

    //set pointer to the end of the file, find file size, then reset position 
    //by closing/opening
    unsigned int file_bytes = lseek(in_fd, 0, SEEK_END);
    close(in_fd);
    in_fd = open(filename_ptr, O_RDONLY);

    //store file contents in buffer
    unsigned char read_buffer[file_bytes];
    read(in_fd, read_buffer, file_bytes);

    //file where the output will be stored
    int out_fd;
    creat("output.txt", 0644);
    out_fd = open("output.txt", O_WRONLY);

    //sets file size in header (needed for decompression, this is the size of the
    //file before compression. everything after this we write this 4-byte int
    //is a 1 byte char
    write(out_fd, &file_bytes, 4);

    unsigned int writer;
    unsigned int temp;
    unsigned char out_char;

    int i;
    int shift_count = 8;
    for(i = 0; i < file_bytes; i++){


      if(shift_count == 8){
          writer = read_buffer[i];
          temp = temp & 0x00000000;
          temp = read_buffer[i+1] << 8;
          shift_count = 1;
      }else{

        //moves the next char's bits to the left, for the purpose of filling the
        //8 bit buffer (writer) via OR operation
        temp = read_buffer[i] << 8;
      }
      temp = temp >> shift_count;
      writer = writer | temp;

      //output right byte of writer
      unsigned int right_byte = writer & 0x000000ff;

      //output right_byte as a char
      out_char = (char) right_byte;

      //write_buffer[i] = out_char;
      write(out_fd, &out_char, 1);

      //clear right side of writer
      writer = writer & 0x0000ff00;

      //shift left side of writer to the right by 8
      writer = writer >> 8;        

      shift_count++;

    }

    return 0;
}
4

1 回答 1

0

在我看来,输入和输出耦合太强了。

在某些时候,程序应该从输入中读取(大致)第 80 个八位字节,并将第 70 个八位字节写入(大致)到输出,因为您希望(平均)为您读取的每 8 位写入 7 位,正确的?

什么循环

for(i = 0; i < file_bytes; i++){
    ...
    ... = read_buffer[i];
    ...
    write(out_fd, &out_char, 1);
    ...
}

实际上似乎正在做的是:在第 70 次通过循环时——当 70==i 时——它从输入读取第 70 个八位字节并将第 70 个八位字节写入输出。在第 80 次通过循环时——当 80==i 时——它从输入读取第 80 个八位字节并将第 80 个八位字节写入输出。

您必须决定:您希望“i”代表处理的输入字符数,还是处理的输出字符数?因为不可能两者都做——不可能有 70 等于 80。

也许这样的事情更接近你想要的:

/* test.c
http://stackoverflow.com/questions/15080239/c-how-to-fix-this-algorithm-for-z827-ascii-compression
WARNING: untested code.
*/

int compress(char *filename_ptr){

    int in_fd;
    in_fd = open(filename_ptr, O_RDONLY);

    //set pointer to the end of the file, find file size, then reset position 
    //by closing/opening
    unsigned int file_bytes = lseek(in_fd, 0, SEEK_END);
    close(in_fd);
    in_fd = open(filename_ptr, O_RDONLY);

    //store file contents in buffer
    unsigned char read_buffer[file_bytes];
    read(in_fd, read_buffer, file_bytes);

    //file where the output will be stored
    int out_fd;
    creat("output.txt", 0644);
    out_fd = open("output.txt", O_WRONLY);

    //sets file size in header (needed for decompression, this is the size of the
    //file before compression. everything after this we write this 4-byte int
    //is a 1 byte char
    write(out_fd, &file_bytes, 4);

    unsigned int writer;
    unsigned int temp;
    unsigned char out_char;

    int i;
    int writer_bits = 0; // 0 bits of data in writer so far
    for(i = 0; i < file_bytes; i++){
      // i is the number of (7 bit ASCII) characters
      // read from the input so far.

      // add 7 more bits to the writer
      temp = read_buffer[i];
      //moves the next char's bits to the left, for the purpose of filling the
      //8 bit buffer (writer) via OR operation
      //(avoid overwriting the "writer_bits" of good bits
      //already in the buffer).
      temp = read_buffer[i] << writer_bits;
      writer = writer | temp;
      writer_bits = writer_bits + 7;

      //output right byte of writer
      unsigned int right_byte = writer & 0x000000ff;

      //output right_byte as a char
      out_char = (unsigned char) right_byte;

      // output 8 bits of data whenever
      // we have *at least* 8 bits of data in the writer buffer.
      if(writer_bits >= 8){

          //write_buffer[i] = out_char;
          write(out_fd, &out_char, 1);

          //shift left side of writer to the right by 8
          writer = writer >> 8;

          writer_bits = writer_bits - 8;
      }else{
          // 7 or fewer bits in writer --
      // skip writing until next time.
      }
    }

    // is there any leftover bits still in writer?
    if(writer_bits > 0){
          //write_buffer[i] = out_char;
          write(out_fd, &out_char, 1);
    }
    return 0;
}

(目前程序将整个输入文件读入RAM,然后写入整个输出文件。有些程序员喜欢一次读一点,然后一次写一点。两种方法都有优点和缺点)。

于 2013-08-20T00:11:37.293 回答