c++ - IBMi 上的正则表达式替换

Question

我正在寻找一种在IBM iseries上使用Regex Replace函数的方法。

据我所知，我可以使用 C++ 库（regex.h）（source）有了这个，我只能匹配正则表达式，但不能替换。（使用regcomp()编译和regexec()匹配正则表达式）

有谁知道怎么做？

score 5 · Accepted Answer

确实，C/C++ POSIX 正则表达式库没有内置的正则表达式替换函数，但您可以使用来自的位置信息和内置函数regexec()RPGLE来完成相同的事情。%replace()（我假设您将使用 RPGLE，但您可以使用另一种语言。）

例如，如果您想屏蔽除电话号码的最后四位以外的所有数字，您可以这样做：

  /include qcpysrc,regex_h

 d regex_phone_number...
 d                 ds                  inz likeds(regex_t)
 d dsrm            ds                  inz likeds(regmatch_t) dim(20)

 d data            s             52a   inz varying
 d pattern         s            256a   inz varying
 d rc              s             10i 0 inz(0)

  /FREE
   *inlr = *on ;
   data = 'My phone #''s are: (444) 555 - 6666 and 777.888.9999' ;

   dsply data ;

   pattern = '\(?([0-9]{3})[ .)]*([0-9]{3})[ .-]*([0-9]{4})' ;
   rc = regcomp(regex_phone_number :pattern :REG_EXTENDED) ;
   if rc = 0 ;
     dow '1' ;
       rc = regexec(regex_phone_number :data
              :regex_phone_number.re_nsub  :%addr(dsrm) :0) ;

       if rc <> 0 ;
         leave ;
       endif ;

       data = %replace('***': data :dsrm(2).rm_so+1
                :dsrm(2).rm_eo - dsrm(2).rm_so) ;
       data = %replace('***': data :dsrm(3).rm_so+1
                :dsrm(3).rm_eo - dsrm(3).rm_so) ;
     enddo ;
   endif ;

   dsply data ;
   regfree(regex_phone_number) ;
  /END-FREE

这是抄本 regex_h 的样子：

  ** Header file for calling the "Regular Expression" functions
  **   provided by the ILE C Runtime Library from an RPG IV
  **   program.                 Scott Klement, 2001-05-04
  **                       Converted to qualified DS 2003-11-29
  **                       Modified by Jarrett Gilliam 2014-11-05
  **
  ** This copy book is for using the C regular expression library, regex.h, in RPG.
  ** You can go to http://www.regular-expressions.info/ to learn more about
  ** regular expressions. This regex flavor is POSIX ERE. You can go to
  ** http://www-01.ibm.com/support/knowledgecenter/ssw_ibm_i_71/rtref/regexec.htm
  ** to learn more about how the C functions work.

 d/if defined(REGEX_H)
 d/eof
 d/endif
 d/define REGEX_H

  **------------------------------------------------------------
  * cflags for regcomp()
  **------------------------------------------------------------
 d REG_BASIC       c                   CONST(0)
 d REG_EXTENDED    c                   CONST(1)
 d REG_ICASE       c                   CONST(2)
 d REG_NEWLINE     c                   CONST(4)
 d REG_NOSUB       c                   CONST(8)

  **------------------------------------------------------------
  * eflags for regexec()
  **------------------------------------------------------------
 d REG_NOTBOL      c                   CONST(256)
 d REG_NOTEOL      c                   CONST(512)

  **------------------------------------------------------------
  *  errors returned
  **------------------------------------------------------------
  * RE pattern not found
 d REG_NOMATCH     c                   CONST(1)
  * Invalid Regular Expression
 d REG_BADPAT      c                   CONST(2)
  * Invalid collating element
 d REG_ECOLLATE    c                   CONST(3)
  * Invalid character class
 d REG_ECTYPE      c                   CONST(4)
  * Last character is \
 d REG_EESCAPE     c                   CONST(5)
  * Invalid number in \digit
 d REG_ESUBREG     c                   CONST(6)
  * imbalance
 d REG_EBRACK      c                   CONST(7)
  * \( \) or () imbalance
 d REG_EPAREN      c                   CONST(8)
  * \{ \} or { } imbalance
 d REG_EBRACE      c                   CONST(9)
  * Invalid \{ \} range exp
 d REG_BADBR       c                   CONST(10)
  * Invalid range exp endpoint
 d REG_ERANGE      c                   CONST(11)
  * Out of memory
 d REG_ESPACE      c                   CONST(12)
  * ?*+ not preceded by valid RE
 d REG_BADRPT      c                   CONST(13)
  * invalid multibyte character
 d REG_ECHAR       c                   CONST(14)
  * (shift 6 caret or not) anchor and not BOL
 d REG_EBOL        c                   CONST(15)
  * $ anchor and not EOL
 d REG_EEOL        c                   CONST(16)
  * Unknown error in regcomp() call
 d REG_ECOMP       c                   CONST(17)
  * Unknown error in regexec() call
 d REG_EEXEC       c                   CONST(18)


  **------------------------------------------------------------
  *  Structure of a compiled regular expression:
  **------------------------------------------------------------
 d REG_SUBEXP_MAX  c                   20
 d regex_t         ds                  qualified align based(template)
 d   re_nsub                     10i 0
 d   re_comp                       *
 d   re_cflags                   10i 0
 d   re_erroff                   10i 0
 d   re_len                      10i 0
 d   re_ucoll                    10i 0 dim(2)
 d   re_lsub                       *   DIM(REG_SUBEXP_MAX)
 d   re_esub                       *   DIM(REG_SUBEXP_MAX)
 d   re_map                     256a
 d   re_shift                     5i 0
 d   re_dbcs                      5i 0

  **------------------------------------------------------------
  *  structure used to report matches found by regexec()
  **------------------------------------------------------------
 d regmatch_t      ds                  qualified align based(template)
 d   rm_so                       10i 0
 d   rm_ss                        5i 0
 d   rm_eo                       10i 0
 d   rm_es                        5i 0

  **------------------------------------------------------------
  * regcomp() -- Compile a Regular Expression ("RE")
  *
  *     int regcomp(regex_t *preg, const char *pattern,
  *              int cflags);
  *
  * where:
  *       preg (output) = the compiled regular expression.
  *    pattern (input)  = the RE to be compiled.
  *     cflags (input)  = the sum of the cflag constants
  *                       (listed above) for this RE.
  *
  * Returns 0 = success, otherwise an error number.
  **------------------------------------------------------------
 d regcomp         pr            10i 0 extproc('regcomp')
 d   preg                              like(regex_t)
 d   pattern                       *   value options(*string)
 d   cflags                      10i 0 value

  **------------------------------------------------------------
  * regexec() -- Execute a compiled Regular Expression ("RE")
  *
  *     int regexec(const regex_t *preg, const char *string,
  *              size_t nmatch, regmatch_t *pmatch, int eflags);
  *
  * where:
  *       preg (input)  = the compiled regular expression
  *                       (the output of regcomp())
  *     string (input)  = string to run the RE upon
  *     nmatch (input)  = the number of matches to return.
  *     pmatch (output) = array of regmatch_t DS's
  *                       showing what matches were found.
  *     eflags (input)  = the sum of the flags (constants
  *                       provided above) modifying the RE
  *
  * Returns 0 = success, otherwise an error number.
  **------------------------------------------------------------
 d regexec         pr            10i 0 extproc('regexec')
 d   preg                              like(regex_t) const
 d   string                        *   value options(*string)
 d   nmatch                      10u 0 value
 d   pmatch                        *   value
 d   eflags                      10i 0 value

  **------------------------------------------------------------
  * regerror() -- return error information from regcomp/regexec
  *
  *   size_t regerror(int errcode, const regex_t *preg,
  *              char *errbuf, size_t errbuf_size);
  *
  *  where:
  *    errcode (input)  = the error code to return info on
  *                      (obtained as the return value from
  *                      either regcomp() or regexec())
  *       preg (input)  = the (compiled) RE to return the
  *                      error for.
  *     errbuf (output) = buffer containing human-readable
  *                      error message.
  * errbuf_size (input) = size of errbuf (max length of msg
  *                      that will be returned)
  *
  * returns:  length of buffer needed to get entire error msg
  **------------------------------------------------------------
 d regerror        pr            10u 0 extproc('regerror')
 d   errcode                     10i 0 value
 d   preg                              like(regex_t) const
 d   errbuf                        *   value
 d   errbuf_size                 10i 0 value

  **------------------------------------------------------------
  * regfree() -- free memory locked by Regular Expression
  *
  *    void regfree(regex_t *preg);
  *
  *   where:
  *        preg (input) = regular expression to free mem for.
  *
  *   NOTE:  regcomp() will always allocate extra memory
  *        to be pointed to by the various pointers in
  *        the regex_t structure.  if you don't call this,
  *        that memory will never be returned to the system!
  **------------------------------------------------------------
 d regfree         pr                  extproc('regfree')
 d   preg                              like(regex_t)

这是输出：

DSPLY  My phone #'s are: (444) 555 - 6666 and 777.888.9999
DSPLY  My phone #'s are: (***) *** - 6666 and ***.***.9999

可以通过提取替换逻辑并将其放入自己的过程中来改进代码，创建基于 POSIX 库的自定义正则表达式替换函数，但这不是绝对必要的。

score 3 · Accepted Answer

ILE C/C++运行时库没有可用的正则表达式替换功能。

然而，Java 对正则表达式有很好的支持，并且很容易与 RPGLE 集成。

score 3 · Accepted Answer

我成功地将 Regex 与 Java 一起使用。我受到scott klement 的这段代码和ibm的这段代码的启发。混合效果很好。我刚刚添加了替换功能。

 H

  /include QSYSINC/QRPGLESRC,JNI
 D newString       pr              O   CLASS(*JAVA:'java.lang.String')
 D                                     EXTPROC(*JAVA:'java.lang.String':
 D                                     *CONSTRUCTOR)
 D    bytearray               32767A   VARYING CONST
 D getBytes        PR         65535A    VARYING
 D                                      EXTPROC(*JAVA:
 D                                       'java.lang.String':
 D                                       'getBytes')
 D PatternCompile  pr              O   CLASS(*JAVA:
 D                                     'java.util.regex.Pattern')
 D                                     EXTPROC(*JAVA:
 D                                     'java.util.regex.Pattern':
 D                                     'compile') STATIC
 D    pattern                      O   CLASS(*JAVA:'java.lang.String')
 D PatternMatcher  pr              O   CLASS(*JAVA:
 D                                     'java.util.regex.Matcher')
 D                                     EXTPROC(*JAVA:
 D                                     'java.util.regex.Pattern':
 D                                     'matcher')
 D    comparestr                   O   CLASS(*JAVA
 D                                     :'java.lang.CharSequence')
 D CheckMatches    pr             1N   EXTPROC(*JAVA
 D                                     :'java.util.regex.Matcher'
 D                                     :'matches')
 D DoReplace       pr              O   CLASS(*JAVA:'java.lang.String')
 D                                     EXTPROC(*JAVA
 D                                     :'java.util.regex.Matcher'
 D                                     :'replaceAll')
 D    replacement                  O   CLASS(*JAVA
 D                                     :'java.lang.String')
 D RegExPattern    s               O   CLASS(*JAVA:
 D                                      'java.util.regex.Pattern')
 D RegExMatcher    s               O   CLASS(*JAVA:
 D                                     'java.util.regex.Matcher')
 D jstrStmt        s                   like(jstring)
 D jPatStr         s                   like(jstring)
 D jRepStr         s                   like(jstring)
 D jRepStr2        s                   like(jstring)
 D result          S             30A   
  /free
    jPatStr = newString('^(\+33|0)([1-9][0-9]{8})$');
    jstrStmt = newString('+33123456789');
    jRepStr = newString('0$2');
    RegExPattern = PatternCompile(jPatStr);
    RegExMatcher = PatternMatcher(RegExPattern : jstrStmt);
    if (CheckMatches(RegExMatcher) = *ON);
        dsply ('it matches');
    else;
        dsply ('it doesn''t match');
    endif;
    jRepStr2 = DoReplace(RegExMatcher : jRepStr);
    result = getBytes(jRepStr2);

    dsply (%subst(result : 1 : 30));
    *inlr = *on;
  /end-free

它可以工作，但使用 Java。我仍在研究 WarrenT 建议的 PASE 解决方案，但是在 ILE 程序中使用 PASE 实在是太痛苦了……

score 2 · Accepted Answer

Young i Professionals Wiki有一个Open Source Binaries页面。列表中是PCRE 库（Perl 兼容的正则表达式）。

让我们知道这是怎么回事。我可以自己试试;-)

score 0 · Accepted Answer

有关优秀的 SQLRPGLE 示例和说明，请参阅：

https://www.rpgpgm.com/2017/10/replacing-parts-of-strings-using-regexp.html

REGEXP_REPLACE（源字符串、模式表达式、替换字符串、开始、出现、标志）

c++ - IBMi 上的正则表达式替换

5 回答 5

Related

Reference