这是一个相当复杂的问题,遗憾的是没有简单的解决方案。
诚然,您可以尝试以下正则表达式,这远非完美:
^.*?(?<address>(?:\b(?:[a-zA-Z0-9.,:;\\\/#-]|\s(?=\S))*?(?<zip>\d{5}(?:-\d{4}|-\d{6})?)?\b)?)\s{2,}(?<city>\b(?:\w|\s(?=\S))+\b)\s{1,}(?<state>\b\w{2,3}\b)(?:$|\r|\n)
在图像中,组 1 = 地址;第 2 组 = 邮编;第 3 组 = 城市;第 4 组 = 状态
输入,注意我STATE
改为st
; zip
到12345
; 和邮政信箱digits
到实际数字
F_NAME L_NAMEFOR F_NAME L_NAME ADDRESS 12345 CITY st
ADDRESS CITY st
ADDRESS EAST/WEST/NORTH/SOUTH/E/W/N/S CITY st
APT # ADDRESS EAST/WEST/NORTH/SOUTH/E/W/N/S CITY st
P O BOX # 1234 ADDRESS CITY st
APT DIGIT# ADDRESS CITY st
SPACE DIGIT ADDRESS CITY st
UNIT # ADDRESS CITY st
SP DIGIT ADDRESS CITY st
DIGITS-DIGITS ADDRESS CITY st
BX DIGIT ADDRESS CITY st
ADDRESS APT # CITY st
ADDRESS UNIT # CITY st
ADDRESS P O BOX 3245 CITY st
P O B O X 123 CITY st
P O BOX 345 CITY st
ADDRESS SPACE/SP/SPC/UNIT DIGIT CITY st
火柴
[0] => Array
(
[0] => F_NAME L_NAMEFOR F_NAME L_NAME ADDRESS 12345 CITY st
[1] => ADDRESS CITY st
[2] => ADDRESS EAST/WEST/NORTH/SOUTH/E/W/N/S CITY st
[3] => APT # ADDRESS EAST/WEST/NORTH/SOUTH/E/W/N/S CITY st
[4] => P O BOX # 1234 ADDRESS CITY st
[5] => APT DIGIT# ADDRESS CITY st
[6] => SPACE DIGIT ADDRESS CITY st
[7] => UNIT # ADDRESS CITY st
[8] => SP DIGIT ADDRESS CITY st
[9] => DIGITS-DIGITS ADDRESS CITY st
[10] => BX DIGIT ADDRESS CITY st
[11] => ADDRESS APT # CITY st
[12] => ADDRESS UNIT # CITY st
[13] => ADDRESS P O BOX DIGIT CITY st
[14] => P O B O X 123 CITY st
[15] => P O BOX 345 CITY st
[16] => ADDRESS SPACE/SP/SPC/UNIT DIGIT CITY st
)
[address] => Array
(
[0] => ADDRESS 12345
[1] => ADDRESS
[2] => ADDRESS EAST/WEST/NORTH/SOUTH/E/W/N/S
[3] => ADDRESS EAST/WEST/NORTH/SOUTH/E/W/N/S
[4] => ADDRESS
[5] => APT DIGIT#
[6] => ADDRESS
[7] => ADDRESS
[8] => ADDRESS
[9] => DIGITS-DIGITS ADDRESS
[10] => ADDRESS
[11] => APT #
[12] => UNIT #
[13] => DIGIT
[14] => 123
[15] => P O BOX 345
[16] => SPACE/SP/SPC/UNIT DIGIT
)
[zip] => Array
(
[0] => 12345
[1] =>
[2] =>
[3] =>
[4] =>
[5] =>
[6] =>
[7] =>
[8] =>
[9] =>
[10] =>
[11] =>
[12] =>
[13] =>
[14] =>
[15] =>
[16] =>
)
[city] => Array
(
[0] => CITY
[1] => CITY
[2] => CITY
[3] => CITY
[4] => CITY
[5] => ADDRESS CITY
[6] => CITY
[7] => CITY
[8] => CITY
[9] => CITY
[10] => CITY
[11] => CITY
[12] => CITY
[13] => CITY
[14] => CITY
[15] => CITY
[16] => CITY
)
[state] => Array
(
[0] => st
[1] => st
[2] => st
[3] => st
[4] => st
[5] => st
[6] => st
[7] => st
[8] => st
[9] => st
[10] => st
[11] => st
[12] => st
[13] => st
[14] => st
[15] => st
[16] => st
)
推荐看看问题11160192