我正在寻找一种在IBM iseries上使用Regex Replace函数的方法。
据我所知,我可以使用 C++ 库(regex.h)(source)有了这个,我只能匹配正则表达式,但不能替换。(使用regcomp()编译和regexec()匹配正则表达式)
有谁知道怎么做?
确实,C/C++ POSIX 正则表达式库没有内置的正则表达式替换函数,但您可以使用来自的位置信息和内置函数regexec()
RPGLE来完成相同的事情。%replace()
(我假设您将使用 RPGLE,但您可以使用另一种语言。)
例如,如果您想屏蔽除电话号码的最后四位以外的所有数字,您可以这样做:
/include qcpysrc,regex_h
d regex_phone_number...
d ds inz likeds(regex_t)
d dsrm ds inz likeds(regmatch_t) dim(20)
d data s 52a inz varying
d pattern s 256a inz varying
d rc s 10i 0 inz(0)
/FREE
*inlr = *on ;
data = 'My phone #''s are: (444) 555 - 6666 and 777.888.9999' ;
dsply data ;
pattern = '\(?([0-9]{3})[ .)]*([0-9]{3})[ .-]*([0-9]{4})' ;
rc = regcomp(regex_phone_number :pattern :REG_EXTENDED) ;
if rc = 0 ;
dow '1' ;
rc = regexec(regex_phone_number :data
:regex_phone_number.re_nsub :%addr(dsrm) :0) ;
if rc <> 0 ;
leave ;
endif ;
data = %replace('***': data :dsrm(2).rm_so+1
:dsrm(2).rm_eo - dsrm(2).rm_so) ;
data = %replace('***': data :dsrm(3).rm_so+1
:dsrm(3).rm_eo - dsrm(3).rm_so) ;
enddo ;
endif ;
dsply data ;
regfree(regex_phone_number) ;
/END-FREE
这是抄本 regex_h 的样子:
** Header file for calling the "Regular Expression" functions
** provided by the ILE C Runtime Library from an RPG IV
** program. Scott Klement, 2001-05-04
** Converted to qualified DS 2003-11-29
** Modified by Jarrett Gilliam 2014-11-05
**
** This copy book is for using the C regular expression library, regex.h, in RPG.
** You can go to http://www.regular-expressions.info/ to learn more about
** regular expressions. This regex flavor is POSIX ERE. You can go to
** http://www-01.ibm.com/support/knowledgecenter/ssw_ibm_i_71/rtref/regexec.htm
** to learn more about how the C functions work.
d/if defined(REGEX_H)
d/eof
d/endif
d/define REGEX_H
**------------------------------------------------------------
* cflags for regcomp()
**------------------------------------------------------------
d REG_BASIC c CONST(0)
d REG_EXTENDED c CONST(1)
d REG_ICASE c CONST(2)
d REG_NEWLINE c CONST(4)
d REG_NOSUB c CONST(8)
**------------------------------------------------------------
* eflags for regexec()
**------------------------------------------------------------
d REG_NOTBOL c CONST(256)
d REG_NOTEOL c CONST(512)
**------------------------------------------------------------
* errors returned
**------------------------------------------------------------
* RE pattern not found
d REG_NOMATCH c CONST(1)
* Invalid Regular Expression
d REG_BADPAT c CONST(2)
* Invalid collating element
d REG_ECOLLATE c CONST(3)
* Invalid character class
d REG_ECTYPE c CONST(4)
* Last character is \
d REG_EESCAPE c CONST(5)
* Invalid number in \digit
d REG_ESUBREG c CONST(6)
* imbalance
d REG_EBRACK c CONST(7)
* \( \) or () imbalance
d REG_EPAREN c CONST(8)
* \{ \} or { } imbalance
d REG_EBRACE c CONST(9)
* Invalid \{ \} range exp
d REG_BADBR c CONST(10)
* Invalid range exp endpoint
d REG_ERANGE c CONST(11)
* Out of memory
d REG_ESPACE c CONST(12)
* ?*+ not preceded by valid RE
d REG_BADRPT c CONST(13)
* invalid multibyte character
d REG_ECHAR c CONST(14)
* (shift 6 caret or not) anchor and not BOL
d REG_EBOL c CONST(15)
* $ anchor and not EOL
d REG_EEOL c CONST(16)
* Unknown error in regcomp() call
d REG_ECOMP c CONST(17)
* Unknown error in regexec() call
d REG_EEXEC c CONST(18)
**------------------------------------------------------------
* Structure of a compiled regular expression:
**------------------------------------------------------------
d REG_SUBEXP_MAX c 20
d regex_t ds qualified align based(template)
d re_nsub 10i 0
d re_comp *
d re_cflags 10i 0
d re_erroff 10i 0
d re_len 10i 0
d re_ucoll 10i 0 dim(2)
d re_lsub * DIM(REG_SUBEXP_MAX)
d re_esub * DIM(REG_SUBEXP_MAX)
d re_map 256a
d re_shift 5i 0
d re_dbcs 5i 0
**------------------------------------------------------------
* structure used to report matches found by regexec()
**------------------------------------------------------------
d regmatch_t ds qualified align based(template)
d rm_so 10i 0
d rm_ss 5i 0
d rm_eo 10i 0
d rm_es 5i 0
**------------------------------------------------------------
* regcomp() -- Compile a Regular Expression ("RE")
*
* int regcomp(regex_t *preg, const char *pattern,
* int cflags);
*
* where:
* preg (output) = the compiled regular expression.
* pattern (input) = the RE to be compiled.
* cflags (input) = the sum of the cflag constants
* (listed above) for this RE.
*
* Returns 0 = success, otherwise an error number.
**------------------------------------------------------------
d regcomp pr 10i 0 extproc('regcomp')
d preg like(regex_t)
d pattern * value options(*string)
d cflags 10i 0 value
**------------------------------------------------------------
* regexec() -- Execute a compiled Regular Expression ("RE")
*
* int regexec(const regex_t *preg, const char *string,
* size_t nmatch, regmatch_t *pmatch, int eflags);
*
* where:
* preg (input) = the compiled regular expression
* (the output of regcomp())
* string (input) = string to run the RE upon
* nmatch (input) = the number of matches to return.
* pmatch (output) = array of regmatch_t DS's
* showing what matches were found.
* eflags (input) = the sum of the flags (constants
* provided above) modifying the RE
*
* Returns 0 = success, otherwise an error number.
**------------------------------------------------------------
d regexec pr 10i 0 extproc('regexec')
d preg like(regex_t) const
d string * value options(*string)
d nmatch 10u 0 value
d pmatch * value
d eflags 10i 0 value
**------------------------------------------------------------
* regerror() -- return error information from regcomp/regexec
*
* size_t regerror(int errcode, const regex_t *preg,
* char *errbuf, size_t errbuf_size);
*
* where:
* errcode (input) = the error code to return info on
* (obtained as the return value from
* either regcomp() or regexec())
* preg (input) = the (compiled) RE to return the
* error for.
* errbuf (output) = buffer containing human-readable
* error message.
* errbuf_size (input) = size of errbuf (max length of msg
* that will be returned)
*
* returns: length of buffer needed to get entire error msg
**------------------------------------------------------------
d regerror pr 10u 0 extproc('regerror')
d errcode 10i 0 value
d preg like(regex_t) const
d errbuf * value
d errbuf_size 10i 0 value
**------------------------------------------------------------
* regfree() -- free memory locked by Regular Expression
*
* void regfree(regex_t *preg);
*
* where:
* preg (input) = regular expression to free mem for.
*
* NOTE: regcomp() will always allocate extra memory
* to be pointed to by the various pointers in
* the regex_t structure. if you don't call this,
* that memory will never be returned to the system!
**------------------------------------------------------------
d regfree pr extproc('regfree')
d preg like(regex_t)
这是输出:
DSPLY My phone #'s are: (444) 555 - 6666 and 777.888.9999
DSPLY My phone #'s are: (***) *** - 6666 and ***.***.9999
可以通过提取替换逻辑并将其放入自己的过程中来改进代码,创建基于 POSIX 库的自定义正则表达式替换函数,但这不是绝对必要的。
我成功地将 Regex 与 Java 一起使用。我受到scott klement 的这段代码和ibm的这段代码的启发。混合效果很好。我刚刚添加了替换功能。
H
/include QSYSINC/QRPGLESRC,JNI
D newString pr O CLASS(*JAVA:'java.lang.String')
D EXTPROC(*JAVA:'java.lang.String':
D *CONSTRUCTOR)
D bytearray 32767A VARYING CONST
D getBytes PR 65535A VARYING
D EXTPROC(*JAVA:
D 'java.lang.String':
D 'getBytes')
D PatternCompile pr O CLASS(*JAVA:
D 'java.util.regex.Pattern')
D EXTPROC(*JAVA:
D 'java.util.regex.Pattern':
D 'compile') STATIC
D pattern O CLASS(*JAVA:'java.lang.String')
D PatternMatcher pr O CLASS(*JAVA:
D 'java.util.regex.Matcher')
D EXTPROC(*JAVA:
D 'java.util.regex.Pattern':
D 'matcher')
D comparestr O CLASS(*JAVA
D :'java.lang.CharSequence')
D CheckMatches pr 1N EXTPROC(*JAVA
D :'java.util.regex.Matcher'
D :'matches')
D DoReplace pr O CLASS(*JAVA:'java.lang.String')
D EXTPROC(*JAVA
D :'java.util.regex.Matcher'
D :'replaceAll')
D replacement O CLASS(*JAVA
D :'java.lang.String')
D RegExPattern s O CLASS(*JAVA:
D 'java.util.regex.Pattern')
D RegExMatcher s O CLASS(*JAVA:
D 'java.util.regex.Matcher')
D jstrStmt s like(jstring)
D jPatStr s like(jstring)
D jRepStr s like(jstring)
D jRepStr2 s like(jstring)
D result S 30A
/free
jPatStr = newString('^(\+33|0)([1-9][0-9]{8})$');
jstrStmt = newString('+33123456789');
jRepStr = newString('0$2');
RegExPattern = PatternCompile(jPatStr);
RegExMatcher = PatternMatcher(RegExPattern : jstrStmt);
if (CheckMatches(RegExMatcher) = *ON);
dsply ('it matches');
else;
dsply ('it doesn''t match');
endif;
jRepStr2 = DoReplace(RegExMatcher : jRepStr);
result = getBytes(jRepStr2);
dsply (%subst(result : 1 : 30));
*inlr = *on;
/end-free
它可以工作,但使用 Java。我仍在研究 WarrenT 建议的 PASE 解决方案,但是在 ILE 程序中使用 PASE 实在是太痛苦了……
Young i Professionals Wiki有一个Open Source Binaries页面。列表中是PCRE 库(Perl 兼容的正则表达式)。
让我们知道这是怎么回事。我可以自己试试;-)
有关优秀的 SQLRPGLE 示例和说明,请参阅:
https://www.rpgpgm.com/2017/10/replacing-parts-of-strings-using-regexp.html
REGEXP_REPLACE(源字符串、模式表达式、替换字符串、开始、出现、标志)