我有一个来自某些用户输入的名为 ArrayA 的字符数组,其中可能包含转义序列字符。我想将 ArrayA 逐个字符复制到 ArrayB 中。话虽如此,我将如何将 ArrayA 中的转义字符表示为 ArrayB?如果我要正确打印 ArrayB,简单地复制字符实际上不会将 \t 转换为选项卡吗?它只是将 \t 作为一个字符而不是正确的?我可以将 0x09 输入到 ArrayB 中以获得 \t 转义序列,以便在打印 ArrayB 时它实际上会在数组中的特定位置打印一个选项卡吗?
问问题
1718 次
1 回答
1
功能cstrlit_chr()
这个怎么样:
/* Convert C Character Literal in (str..end] (excluding surrounding quotes) */
/* to character, returning converted char or -1 if string is invalid. */
/* Convert string containing C character literal to character value */
/* Returns -1 if character literal is invalid, otherwise 0x00..0xFF */
/* Does not support extension \E for ESC \033. */
/* Does not support any extension for DEL \177. */
/* Does not support control-char notation ^A for CTRL-A \001. */
/* Accepts \z as valid z when z is not otherwise special. */
/* Accepts \038 as valid CTRL-C \003; next character starts with the 8. */
/* Accepts \x3Z as valid CTRL-C \003; next character starts with the Z. */
/* Treats invalid octal escape \8 or \9 as 8 or 9 */
int cstrlit_chr(const char *str, const char *end, char const ** const eptr)
{
unsigned char u;
int rv;
if (str >= end)
rv = -1; /* String contains no data */
else if ((u = *str++) != '\\')
rv = u;
else if (str == end)
rv = -1; /* Just a backslash - invalid */
else if ((u = *str++) == 'x')
{
/**
** Hex character constant - \xHH or \xH, where H is a hex digit.
** Technically, can be \xHHH too, if CHAR_BIT > 8; this nicety
** is being studiously ignored.
*/
int x1;
int x2;
if (str == end)
rv = -1;
else if ((x1 = basedigit(*str++, 16)) < 0)
{
rv = -1; /* Invalid hex constant */
str--;
}
else if (str == end)
rv = x1; /* Single digit hex constant */
else if ((x2 = basedigit(*str++, 16)) < 0)
{
rv = x1; /* Single-digit hex constant */
str--;
}
else
rv = (x1 << 4) | x2; /* Double-digit hex constant */
}
else if (isdigit(u))
{
/**
** Octal character constant - \O or \OO or \OOO, where O is an
** octal digit. Technically, the constant extends for an
** indefinite number of octal digits; this nicety is being
** studiously ignored. Treat \8 as 8 and \9 as 9.
*/
int o1;
int o2;
int o3;
if ((o1 = basedigit(u, 8)) < 0)
rv = u; /* Invalid octal constant (\8 or \9) */
else if (str == end)
rv = o1; /* Single-digit octal constant */
else if ((o2 = basedigit(*str++, 8)) < 0)
{
rv = o1; /* Single-digit octal constant */
str--;
}
else if (str == end)
rv = (o1 << 3) | o2; /* Double-digit octal constant */
else if ((o3 = basedigit(*str++, 8)) < 0)
{
rv = (o1 << 3) | o2; /* Double-digit octal constant */
str--;
}
else if (o1 >= 4)
rv = -1; /* Out of range 0x00..0xFF (\000..\377) */
else
rv = (((o1 << 3) | o2) << 3) | o3;
}
else
{
/* Presumably \a, \b, \f, \n, \r, \t, \v, \', \", \? or \\ - or an error */
switch (u)
{
case 'a':
rv = '\a';
break;
case 'b':
rv = '\b';
break;
case 'f':
rv = '\f';
break;
case 'n':
rv = '\n';
break;
case 'r':
rv = '\r';
break;
case 't':
rv = '\t';
break;
case 'v':
rv = '\v';
break;
case '\"':
rv = '\"';
break;
case '\'':
rv = '\'';
break;
case '\?':
rv = '\?';
break;
case '\\':
rv = '\\';
break;
case '\0': /* Malformed: solitary backslash followed by NUL */
rv = -1;
break;
default:
rv = u; /* Nominally invalid: \X but X not special; return X. */
break;
}
}
if (eptr != 0)
*eptr = str;
return(rv);
}
它处理 C89 字符序列;它不处理 Unicode(通用)字符(\uXXXX
或\U00XXXXXX
)。
功能basedigit()
/*
** Convert character to digit in given base,
** returning -1 for invalid bases and characters.
*/
int basedigit(char c, int base)
{
int i;
#if (('z' - 'a') != 25 || ('Z' - 'A') != 25)
#error Faulty Assumption
This code assumes the code set is ASCII, ISO 646, ISO 8859, or something similar.
#endif /* Alphabet test */
if (base < 2 || base > 36)
i = -1;
else if (c >= '0' && c <= '9')
i = c - '0';
else if (c >= 'A' && c <= 'Z')
i = c - 'A' + 10;
else if (c >= 'a' && c <= 'z')
i = c - 'a' + 10;
else
i = -1;
return((i < base) ? i : -1);
}
示例使用
/* Sample usage */
#include <stdio.h>
#include <string.h>
int main(void)
{
const char data[] = "ab\\xFF\\03\\7\\377\\t\\?\\'\\\\yz";
const char *end = data + strlen(data);
const char *start = data;
const char *next;
int c;
while ((c = cstrlit_chr(start, end, &next)) != -1)
{
char buffer[20];
snprintf(buffer, sizeof(buffer), "[[%.*s]]", (int)(next-start), start);
printf("%3d (0x%.2X) %-10s - [[%s]]\n", c, c & 0xFF, buffer, next);
start = next;
}
return 0;
}
请注意,扫描的范围由指向开始字符的指针标识,结束由指向范围结束后的字符的指针标识(在示例中,'\0'
在字符串的末尾,但该函数适用于任意数据并且不需要它以空结尾。输入字符串在源代码中具有双反斜杠,因此实际字符串包含单个反斜杠。
如果c == -1
,则转换失败。否则,c
包含字符,并且end
是指向转换完成位置的指针。
于 2013-09-16T06:56:17.170 回答