我有一个工作正则表达式来解析匹配每个有效拼音但不匹配无效拼音的拼音。我想知道如何优化它。
^(?P<initial>ch|zh|sh|r|c|b|d|g|f|h|k|j|m|l|n|q|p|s|t|w|y|x|z|)
(?P<final>
(?:(?<=ch)uang|(?<=ch)ang|(?<=ch)eng|(?<=ch)ong|(?<=ch)uai|(?<=ch)uan|(?<=ch)ai|(?<=ch)an|(?<=ch)ao|(?<=ch)en|(?<=ch)ou|(?<=ch)ua|(?<=ch)ui|(?<=ch)un|(?<=ch)uo|(?<=ch)a|(?<=ch)e|(?<=ch)i|(?<=ch)u)
|(?:(?<=zh)uang|(?<=zh)ang|(?<=zh)eng|(?<=zh)ong|(?<=zh)uai|(?<=zh)uan|(?<=zh)ai|(?<=zh)an|(?<=zh)ao|(?<=zh)ei|(?<=zh)en|(?<=zh)ou|(?<=zh)ua|(?<=zh)ui|(?<=zh)un|(?<=zh)uo|(?<=zh)a|(?<=zh)e|(?<=zh)i|(?<=zh)u)
|(?:(?<=sh)uang|(?<=sh)ang|(?<=sh)eng|(?<=sh)uai|(?<=sh)uan|(?<=sh)ai|(?<=sh)an|(?<=sh)ao|(?<=sh)ei|(?<=sh)en|(?<=sh)ou|(?<=sh)ua|(?<=sh)ui|(?<=sh)un|(?<=sh)uo|(?<=sh)a|(?<=sh)e|(?<=sh)i|(?<=sh)u)
|(?:(?<=c)ang|(?<=c)eng|(?<=c)ong|(?<=c)uan|(?<=c)ai|(?<=c)an|(?<=c)ao|(?<=c)en|(?<=c)ou|(?<=c)ui|(?<=c)un|(?<=c)uo|(?<=c)a|(?<=c)e|(?<=c)i|(?<=c)u)
|(?:(?<=b)ang|(?<=b)eng|(?<=b)ian|(?<=b)iao|(?<=b)ing|(?<=b)ai|(?<=b)an|(?<=b)ao|(?<=b)ei|(?<=b)en|(?<=b)ie|(?<=b)in|(?<=b)a|(?<=b)i|(?<=b)o|(?<=b)u)
|(?:(?<=d)ang|(?<=d)eng|(?<=d)ian|(?<=d)iao|(?<=d)ing|(?<=d)ong|(?<=d)uan|(?<=d)ai|(?<=d)an|(?<=d)ao|(?<=d)ei|(?<=d)en|(?<=d)ia|(?<=d)ie|(?<=d)iu|(?<=d)ou|(?<=d)ui|(?<=d)un|(?<=d)uo|(?<=d)a|(?<=d)e|(?<=d)i|(?<=d)u)
|(?:(?<!r|c|b|d|g|f|h|k|j|m|l|n|q|p|s|t|w|y|x|z)a|(?<!r|c|b|d|g|f|h|k|j|m|l|n|q|p|s|t|w|y|x|z)ai
|(?<!r|c|b|d|g|f|h|k|j|m|l|n|q|p|s|t|w|y|x|z)an|(?<!r|c|b|d|g|f|h|k|j|m|l|n|q|p|s|t|w|y|x|z)ang
|(?<!r|c|b|d|g|f|h|k|j|m|l|n|q|p|s|t|w|y|x|z)ao|(?<!r|c|b|d|g|f|h|k|j|m|l|n|q|p|s|t|w|y|x|z)e
为了便于阅读,上面是一个缩写版本。整个表达式可以在这篇文章的末尾找到。
我特别想知道是否将两个或多个前缀传递给结束匹配器会提高性能:
(<=ch|zh|sh)uang|(<=ch|zh|sh)ang...
感谢您的时间和建议。
整个正则表达式:
^(?P<initial>ch|zh|sh|r|c|b|d|g|f|h|k|j|m|l|n|q|p|s|t|w|y|x|z|)(?P<final>(?:(?<=ch)uang|(?<=ch)ang|(?<=ch)eng|(?<=ch)ong|(?<=ch)uai|(?<=ch)uan|(?<=ch)ai|(?<=ch)an|(?<=ch)ao|(?<=ch)en|(?<=ch)ou|(?<=ch)ua|(?<=ch)ui|(?<=ch)un|(?<=ch)uo|(?<=ch)a|(?<=ch)e|(?<=ch)i|(?<=ch)u)|(?:(?<=zh)uang|(?<=zh)ang|(?<=zh)eng|(?<=zh)ong|(?<=zh)uai|(?<=zh)uan|(?<=zh)ai|(?<=zh)an|(?<=zh)ao|(?<=zh)ei|(?<=zh)en|(?<=zh)ou|(?<=zh)ua|(?<=zh)ui|(?<=zh)un|(?<=zh)uo|(?<=zh)a|(?<=zh)e|(?<=zh)i|(?<=zh)u)|(?:(?<=sh)uang|(?<=sh)ang|(?<=sh)eng|(?<=sh)uai|(?<=sh)uan|(?<=sh)ai|(?<=sh)an|(?<=sh)ao|(?<=sh)ei|(?<=sh)en|(?<=sh)ou|(?<=sh)ua|(?<=sh)ui|(?<=sh)un|(?<=sh)uo|(?<=sh)a|(?<=sh)e|(?<=sh)i|(?<=sh)u)|(?:(?<=c)ang|(?<=c)eng|(?<=c)ong|(?<=c)uan|(?<=c)ai|(?<=c)an|(?<=c)ao|(?<=c)en|(?<=c)ou|(?<=c)ui|(?<=c)un|(?<=c)uo|(?<=c)a|(?<=c)e|(?<=c)i|(?<=c)u)|(?:(?<=b)ang|(?<=b)eng|(?<=b)ian|(?<=b)iao|(?<=b)ing|(?<=b)ai|(?<=b)an|(?<=b)ao|(?<=b)ei|(?<=b)en|(?<=b)ie|(?<=b)in|(?<=b)a|(?<=b)i|(?<=b)o|(?<=b)u)|(?:(?<=d)ang|(?<=d)eng|(?<=d)ian|(?<=d)iao|(?<=d)ing|(?<=d)ong|(?<=d)uan|(?<=d)ai|(?<=d)an|(?<=d)ao|(?<=d)ei|(?<=d)en|(?<=d)ia|(?<=d)ie|(?<=d)iu|(?<=d)ou|(?<=d)ui|(?<=d)un|(?<=d)uo|(?<=d)a|(?<=d)e|(?<=d)i|(?<=d)u)|(?:(?<=g)uang|(?<=g)ang|(?<=g)eng|(?<=g)ong|(?<=g)uai|(?<=g)uan|(?<=g)ai|(?<=g)an|(?<=g)ao|(?<=g)ei|(?<=g)en|(?<=g)ou|(?<=g)ua|(?<=g)ui|(?<=g)un|(?<=g)uo|(?<=g)a|(?<=g)e|(?<=g)u)|(?:(?<=f)ang|(?<=f)eng|(?<=f)iao|(?<=f)an|(?<=f)ei|(?<=f)en|(?<=f)ou|(?<=f)a|(?<=f)o|(?<=f)u)|(?:(?<!sh|ch|zh)(?<=h)uang|(?<!sh|ch|zh)(?<=h)ang|(?<!sh|ch|zh)(?<=h)eng|(?<!sh|ch|zh)(?<=h)ong|(?<!sh|ch|zh)(?<=h)uai|(?<!sh|ch|zh)(?<=h)uan|(?<!sh|ch|zh)(?<=h)ai|(?<!sh|ch|zh)(?<=h)an|(?<!sh|ch|zh)(?<=h)ao|(?<!sh|ch|zh)(?<=h)ei|(?<!sh|ch|zh)(?<=h)en|(?<!sh|ch|zh)(?<=h)ou|(?<!sh|ch|zh)(?<=h)ua|(?<!sh|ch|zh)(?<=h)ui|(?<!sh|ch|zh)(?<=h)un|(?<!sh|ch|zh)(?<=h)uo|(?<!sh|ch|zh)(?<=h)a|(?<!sh|ch|zh)(?<=h)e|(?<!sh|ch|zh)(?<=h)u)|(?:(?<=k)uang|(?<=k)ang|(?<=k)eng|(?<=k)ong|(?<=k)uai|(?<=k)uan|(?<=k)ai|(?<=k)an|(?<=k)ao|(?<=k)en|(?<=k)ou|(?<=k)ua|(?<=k)ui|(?<=k)un|(?<=k)uo|(?<=k)a|(?<=k)e|(?<=k)u)|(?:(?<=j)iang|(?<=j)iong|(?<=j)ian|(?<=j)iao|(?<=j)ing|(?<=j)üan|(?<=j)ia|(?<=j)ie|(?<=j)in|(?<=j)iu|(?<=j)üe|(?<=j)ün|(?<=j)i|(?<=j)ü)|(?:(?<=m)ang|(?<=m)eng|(?<=m)ian|(?<=m)iao|(?<=m)ing|(?<=m)ai|(?<=m)an|(?<=m)ao|(?<=m)ei|(?<=m)en|(?<=m)ie|(?<=m)in|(?<=m)iu|(?<=m)ou|(?<=m)a|(?<=m)e|(?<=m)i|(?<=m)o|(?<=m)u)|(?:(?<=l)iang|(?<=l)ang|(?<=l)eng|(?<=l)ian|(?<=l)iao|(?<=l)ing|(?<=l)ong|(?<=l)uan|(?<=l)ai|(?<=l)an|(?<=l)ao|(?<=l)ei|(?<=l)ia|(?<=l)ie|(?<=l)in|(?<=l)iu|(?<=l)ou|(?<=l)un|(?<=l)uo|(?<=l)üe|(?<=l)a|(?<=l)e|(?<=l)i|(?<=l)o|(?<=l)u|(?<=l)ü)|(?:(?<=n)iang|(?<=n)ang|(?<=n)eng|(?<=n)ian|(?<=n)iao|(?<=n)ing|(?<=n)ong|(?<=n)uan|(?<=n)ai|(?<=n)an|(?<=n)ao|(?<=n)ei|(?<=n)en|(?<=n)ie|(?<=n)in|(?<=n)iu|(?<=n)ou|(?<=n)un|(?<=n)uo|(?<=n)üe|(?<=n)a|(?<=n)e|(?<=n)i|(?<=n)u|(?<=n)ü)|(?:(?<=q)iang|(?<=q)iong|(?<=q)ian|(?<=q)iao|(?<=q)ing|(?<=q)üan|(?<=q)ia|(?<=q)ie|(?<=q)in|(?<=q)iu|(?<=q)üe|(?<=q)ün|(?<=q)i|(?<=q)ü)|(?:(?<=p)ang|(?<=p)eng|(?<=p)ian|(?<=p)iao|(?<=p)ing|(?<=p)ai|(?<=p)an|(?<=p)ao|(?<=p)ei|(?<=p)en|(?<=p)ie|(?<=p)in|(?<=p)ou|(?<=p)a|(?<=p)i|(?<=p)o|(?<=p)u)|(?:(?<=s)ang|(?<=s)eng|(?<=s)ong|(?<=s)uan|(?<=s)ai|(?<=s)an|(?<=s)ao|(?<=s)en|(?<=s)ou|(?<=s)ui|(?<=s)un|(?<=s)uo|(?<=s)a|(?<=s)e|(?<=s)i|(?<=s)u)|(?:(?<=r)ang|(?<=r)eng|(?<=r)ong|(?<=r)uan|(?<=r)an|(?<=r)ao|(?<=r)en|(?<=r)ou|(?<=r)ua|(?<=r)ui|(?<=r)un|(?<=r)uo|(?<=r)e|(?<=r)i|(?<=r)u)|(?:(?<=t)ang|(?<=t)eng|(?<=t)ian|(?<=t)iao|(?<=t)ing|(?<=t)ong|(?<=t)uan|(?<=t)ai|(?<=t)an|(?<=t)ao|(?<=t)ei|(?<=t)ie|(?<=t)ou|(?<=t)ui|(?<=t)un|(?<=t)uo|(?<=t)a|(?<=t)e|(?<=t)i|(?<=t)u)|(?:(?<=w)ang|(?<=w)eng|(?<=w)ai|(?<=w)an|(?<=w)ei|(?<=w)en|(?<=w)a|(?<=w)o|(?<=w)u)|(?:(?<=y)ang|(?<=y)ing|(?<=y)ong|(?<=y)uan|(?<=y)ai|(?<=y)an|(?<=y)ao|(?<=y)in|(?<=y)ou|(?<=y)ue|(?<=y)un|(?<=y)a|(?<=y)e|(?<=y)e|(?<=y)i|(?<=y)o|(?<=y)u)|(?:(?<=x)iang|(?<=x)iong|(?<=x)ian|(?<=x)iao|(?<=x)ing|(?<=x)üan|(?<=x)ia|(?<=x)ie|(?<=x)in|(?<=x)iu|(?<=x)üe|(?<=x)ün|(?<=x)i|(?<=x)ü)|(?:(?<=z)ang|(?<=z)eng|(?<=z)ong|(?<=z)uan|(?<=z)ai|(?<=z)an|(?<=z)ao|(?<=z)ei|(?<=z)en|(?<=z)ou|(?<=z)ui|(?<=z)un|(?<=z)uo|(?<=z)a|(?<=z)e|(?<=z)i|(?<=z)u)|(?:(?<!r|c|b|d|g|f|h|k|j|m|l|n|q|p|s|t|w|y|x|z)a|(?<!r|c|b|d|g|f|h|k|j|m|l|n|q|p|s|t|w|y|x|z)ai|(?<!r|c|b|d|g|f|h|k|j|m|l|n|q|p|s|t|w|y|x|z)an|(?<!r|c|b|d|g|f|h|k|j|m|l|n|q|p|s|t|w|y|x|z)ang|(?<!r|c|b|d|g|f|h|k|j|m|l|n|q|p|s|t|w|y|x|z)ao|(?<!r|c|b|d|g|f|h|k|j|m|l|n|q|p|s|t|w|y|x|z)e|(?<!r|c|b|d|g|f|h|k|j|m|l|n|q|p|s|t|w|y|x|z)ei|(?<!r|c|b|d|g|f|h|k|j|m|l|n|q|p|s|t|w|y|x|z)en|(?<!r|c|b|d|g|f|h|k|j|m|l|n|q|p|s|t|w|y|x|z)eng|(?<!r|c|b|d|g|f|h|k|j|m|l|n|q|p|s|t|w|y|x|z)er|(?<!r|c|b|d|g|f|h|k|j|m|l|n|q|p|s|t|w|y|x|z)o|(?<!r|c|b|d|g|f|h|k|j|m|l|n|q|p|s|t|w|y|x|z)ou))$