2

是否有任何用 C 编写的有效(对于大于 5MB 的文本文件)引用的可打印解码器?我在 iOS 项目中需要这样的解码器。

与此同时,我正在使用一个太慢的高级解码器。在设备上解码 5MB 文件最多需要 10 分钟:

- (NSString *)decodedQuotedPrintable:(NSString *)string
{
    NSMutableString *decodedString = string.mutableCopy;

    [decodedString replaceOccurrencesOfString:@"=\r\n" withString:@"" options:NSCaseInsensitiveSearch range:NSMakeRange(0, decodedString.length)];

    NSInteger idx = 0;
    _WHILE(idx != NSNotFound)
    {
        idx = [decodedString rangeOfString:@"="
                                   options:NSCaseInsensitiveSearch
                                     range:NSMakeRange(idx + 1, decodedString.length - idx - 1)].location;

        _IF(idx + 5> decodedString.length)
        {
            break;
        }
        unsigned int hex = 0;
        NSScanner *scanner = [NSScanner scannerWithString:[decodedString substringWithRange:
                                                           NSMakeRange(idx+1, 2)]];

        [scanner scanHexInt:&hex];

        [decodedString replaceCharactersInRange:NSMakeRange(idx, 3)
                                     withString:[NSString stringWithFormat:@"%c", hex]];

    }

    return decodedString;
}
4

2 回答 2

2

我现在使用 C 级字符串操作得到了它。它就像一个魅力。这将我在 iPad2 上处理测试文件的时间从 6 分钟缩短到 3 秒:

- (char *)replace1:(char const * const)original
{
    char const * const pattern = "=\r\n";

    size_t const patlen = strlen(pattern);
    size_t const orilen = strlen(original);

    size_t patcnt = 0;
    const char * oriptr;
    const char * patloc;

    // find how many times the pattern occurs in the original string
    _FOR(oriptr = original;
         (patloc = strstr(oriptr, pattern));
         oriptr = patloc + patlen)
    {
        patcnt++;
    }

    {
        // allocate memory for the new string
        size_t const retlen = orilen - patcnt * patlen;
        char * const returned = (char *) malloc( sizeof(char) * (retlen + 1) );

        _IF(returned != NULL)
        {
            // copy the original string,
            // replacing all the instances of the pattern
            char * retptr = returned;
            _FOR(oriptr = original;
                 (patloc = strstr(oriptr, pattern));
                 oriptr = patloc + patlen)
            {
                size_t const skplen = patloc - oriptr;
                // copy the section until the occurence of the pattern
                strncpy(retptr, oriptr, skplen);
                retptr += skplen;
            }
            // copy the rest of the string.
            strcpy(retptr, oriptr);
        }
        return returned;
    }
}

- (char *)replace2:(char const * const)original
{
    size_t const replen = 1;
    size_t const patlen = 3;
    size_t const orilen = strlen(original);

    size_t patcnt = 0;
    const char * oriptr;
    const char * patloc;

    // find how many times the pattern occurs in the original string
    _FOR(oriptr = original; (patloc = strstr(oriptr, "=")); oriptr = patloc + patlen)
    {
        patcnt++;
    }

    {
        // allocate memory for the new string
        size_t const retlen = orilen + patcnt * (replen - patlen);
        char * const returned = (char *) malloc( sizeof(char) * (retlen + 1) );

        _IF(returned != NULL)
        {
            // copy the original string,
            // replacing all the instances of the pattern
            char * retptr = returned;
            _FOR(oriptr = original;
                 (patloc = strstr(oriptr, "="));
                 oriptr = patloc + patlen)
            {
                char newRep[3];

                newRep[0] = patloc[1];
                newRep[1] = patloc[2];
                newRep[2] = '\0';

                char _rep[2];
                _rep[0] = (char)(int)strtol(newRep, NULL, 16);
                _rep[1] = '\0';

                size_t const skplen = patloc - oriptr;
                // copy the section until the occurence of the pattern
                strncpy(retptr, oriptr, skplen);
                retptr += skplen;
                // copy the replacement
                strncpy(retptr, _rep, replen);
                retptr += replen;
            }
            // copy the rest of the string.
            strcpy(retptr, oriptr);
        }
        return returned;
    }
}

- (NSString *)decodedQuotedPrintable:(NSString *)string
{
    char *temp = [self replace1:(char *)[string UTF8String]];

    temp = [self replace2:temp];

    return [NSString stringWithCString:temp
                              encoding:NSUTF8StringEncoding];
}
于 2013-06-26T11:35:43.543 回答
1

尽量不要在可变字符串中进行所有这些操作(这将成为字符串的病态缩短,一次一个字符)。

使用新字符串(或缓冲区),解析原始字符串,写入新字符串。

于 2013-06-25T14:26:01.157 回答