1
#ifndef _CTYPE_H_
#define _CTYPE_H_

#include "_ansi.h"

_BEGIN_STD_C

int _EXFUN(isalnum, (int __c));
int _EXFUN(isalpha, (int __c));
int _EXFUN(iscntrl, (int __c));
int _EXFUN(isdigit, (int __c));
int _EXFUN(isgraph, (int __c));
int _EXFUN(islower, (int __c));
int _EXFUN(isprint, (int __c));
int _EXFUN(ispunct, (int __c));
int _EXFUN(isspace, (int __c));
int _EXFUN(isupper, (int __c));
int _EXFUN(isxdigit,(int __c));
int _EXFUN(tolower, (int __c));
int _EXFUN(toupper, (int __c));

#if !defined(__STRICT_ANSI__) || defined(__cplusplus) || __STDC_VERSION__ >= 199901L
int _EXFUN(isblank, (int __c));
#endif

#ifndef __STRICT_ANSI__
int _EXFUN(isascii, (int __c));
int _EXFUN(toascii, (int __c));
#define _tolower(__c) ((unsigned char)(__c) - 'A' + 'a')
#define _toupper(__c) ((unsigned char)(__c) - 'a' + 'A')
#endif

#define _U  01
#define _L  02
#define _N  04
#define _S  010
#define _P  020
#define _C  040
#define _X  0100
#define _B  0200

#ifndef _MB_CAPABLE
_CONST
#endif
extern  __IMPORT char   *__ctype_ptr__;

#ifndef __cplusplus
/* These macros are intentionally written in a manner that will trigger
   a gcc -Wall warning if the user mistakenly passes a 'char' instead
   of an int containing an 'unsigned char'.  Note that the sizeof will
   always be 1, which is what we want for mapping EOF to __ctype_ptr__[0];
   the use of a raw index inside the sizeof triggers the gcc warning if
   __c was of type char, and sizeof masks side effects of the extra __c.
   Meanwhile, the real index to __ctype_ptr__+1 must be cast to int,
   since isalpha(0x100000001LL) must equal isalpha(1), rather than being
   an out-of-bounds reference on a 64-bit machine.  */
#define __ctype_lookup(__c) ((__ctype_ptr__+sizeof(""[__c]))[(int)(__c)])

#define isalpha(__c)    (__ctype_lookup(__c)&(_U|_L))
#define isupper(__c)    ((__ctype_lookup(__c)&(_U|_L))==_U)
#define islower(__c)    ((__ctype_lookup(__c)&(_U|_L))==_L)
#define isdigit(__c)    (__ctype_lookup(__c)&_N)
#define isxdigit(__c)   (__ctype_lookup(__c)&(_X|_N))
#define isspace(__c)    (__ctype_lookup(__c)&_S)
#define ispunct(__c)    (__ctype_lookup(__c)&_P)
#define isalnum(__c)    (__ctype_lookup(__c)&(_U|_L|_N))
#define isprint(__c)    (__ctype_lookup(__c)&(_P|_U|_L|_N|_B))
#define isgraph(__c)    (__ctype_lookup(__c)&(_P|_U|_L|_N))
#define iscntrl(__c)    (__ctype_lookup(__c)&_C)

#if defined(__GNUC__) && \
    (!defined(__STRICT_ANSI__) || __STDC_VERSION__ >= 199901L)
#define isblank(__c) \
  __extension__ ({ __typeof__ (__c) __x = (__c);        \
        (__ctype_lookup(__x)&_B) || (int) (__x) == '\t';})
#endif


/* Non-gcc versions will get the library versions, and will be
   slightly slower.  These macros are not NLS-aware so they are
   disabled if the system supports the extended character sets. */
# if defined(__GNUC__)
#  if !defined (_MB_EXTENDED_CHARSETS_ISO) && !defined (_MB_EXTENDED_CHARSETS_WINDOWS)
#   define toupper(__c) \
  __extension__ ({ __typeof__ (__c) __x = (__c);    \
      islower (__x) ? (int) __x - 'a' + 'A' : (int) __x;})
#   define tolower(__c) \
  __extension__ ({ __typeof__ (__c) __x = (__c);    \
      isupper (__x) ? (int) __x - 'A' + 'a' : (int) __x;})
#  else /* _MB_EXTENDED_CHARSETS* */
/* Allow a gcc warning if the user passed 'char', but defer to the
   function.  */
#   define toupper(__c) \
  __extension__ ({ __typeof__ (__c) __x = (__c);    \
      (void) __ctype_ptr__[__x]; (toupper) (__x);})
#   define tolower(__c) \
  __extension__ ({ __typeof__ (__c) __x = (__c);    \
      (void) __ctype_ptr__[__x]; (tolower) (__x);})
#  endif /* _MB_EXTENDED_CHARSETS* */
# endif /* __GNUC__ */
#endif /* !__cplusplus */

#ifndef __STRICT_ANSI__
#define isascii(__c)    ((unsigned)(__c)<=0177)
#define toascii(__c)    ((__c)&0177)
#endif

/* For C++ backward-compatibility only.  */
extern  __IMPORT _CONST char    _ctype_[];

_END_STD_C

#endif /* _CTYPE_H_ */

此代码来自标准库 ctype.h。

下面的代码给我留下了深刻的印象:

#define isalpha(__c)    (__ctype_lookup(__c)&(_U|_L))

但是怎么做?仅通过该操作如何检查字母表?我想我应该知道 __ctype_lookup 是什么。

然而, __ctype_lookup 更奇怪。

#define __ctype_lookup(__c) ((__ctype_ptr__+sizeof(""[__c]))[(int)(__c)])

幸好代码正上方有注释,可惜看不懂。

/* 这些宏的编写方式是有意编写的,如果用户错误地传递了一个“char”而不是包含“unsigned char”的 int,则会触发 gcc -Wall 警告。请注意,sizeof 将始终为 1,这就是我们想要将 EOF 映射到 __ctype_ptr__[0] 的值;如果 __c 是 char 类型,则在 sizeof 中使用原始索引会触发 gcc 警告,并且 sizeof 会掩盖额外 __c 的副作用。同时,__ctype_ptr__+1 的实际索引必须转换为 int,因为 isalpha(0x100000001LL) 必须等于 isalpha(1),而不是 64 位机器上的越界引用。*/

来人帮帮我!

以下代码中的 __IMPORT 只是在某处定义的。如果它很重要,我会找到它。

extern  __IMPORT char   *__ctype_ptr__;
4

1 回答 1

2

参考ctype.c 的铬实现(希望它具有代表性):

__ctype_ptr__是指向_ctype_数组开头的指针:

char *__ctype_ptr__ = (char *) _ctype_;

_ctype_数组是一个 const 字符数组:

_CONST char _ctype_[1 + 256] = {
    0,
    _CTYPE_DATA_0_127,
    _CTYPE_DATA_128_255
};

最后,_CTYPE_DATA_0_127and_CTYPE_DATA_128_255条目实际上定义了包含多个chars,每个 s 都是一个位域:

#define _CTYPE_DATA_0_127 \
_C, _C, _C, _C, _C, _C, _C, _C, \
_C, _C|_S, _C|_S, _C|_S,    _C|_S,  _C|_S,  _C, _C, \
_C, _C, _C, _C, _C, _C, _C, _C, \
_C, _C, _C, _C, _C, _C, _C, _C, \
_S|_B,  _P, _P, _P, _P, _P, _P, _P, \
_P, _P, _P, _P, _P, _P, _P, _P, \
_N, _N, _N, _N, _N, _N, _N, _N, \
_N, _N, _P, _P, _P, _P, _P, _P, \
_P, _U|_X,  _U|_X,  _U|_X,  _U|_X,  _U|_X,  _U|_X,  _U, \
_U, _U, _U, _U, _U, _U, _U, _U, \
_U, _U, _U, _U, _U, _U, _U, _U, \
_U, _U, _U, _P, _P, _P, _P, _P, \
_P, _L|_X,  _L|_X,  _L|_X,  _L|_X,  _L|_X,  _L|_X,  _L, \
_L, _L, _L, _L, _L, _L, _L, _L, \
_L, _L, _L, _L, _L, _L, _L, _L, \
_L, _L, _L, _P, _P, _P, _P, _C

其中 each_U_P是不同的位:

#define _U  01
#define _L  02
#define _N  04
#define _S  010
#define _P  020
#define _C  040
#define _X  0100
#define _B  0200

所以,这条线:

#define __ctype_lookup(__c) ((__ctype_ptr__+sizeof(""[__c]))[(int)(__c)])1

只是对该_ctype_数组的查找,然后

#define isalpha(__c) (__ctype_lookup(__c)&(_U|_L))

是该查找的按位掩码,具有适当的位掩码(在本例中为大写和小写的 OR)。


1:老实说,我不确定sizeof(""[__c])取得了什么成就,我猜这与对不同编译器/架构或其他东西的支持有关。它跳过_ctype_数组中的第一个“0”

于 2018-08-03T12:25:19.813 回答