尝试标准化数据。
如果不能,请设置一个带有州名的表格。
SELECT c.col,
TRIM(SUBSTR(c.col, 1, LENGTH(c.col) - LENGTH(c.state))) AS city,
c.state AS state
FROM (SELECT a.col,
CASE
WHEN b.state IS NULL
THEN SUBSTRING_INDEX(a.state, ' ', -1)
ELSE b.state
END AS state
FROM
(SELECT col,
CASE
WHEN LENGTH(col) - LENGTH(REPLACE(col, ' ', '')) + 1 = 2
THEN SUBSTRING_INDEX(col, ' ', -1)
WHEN LENGTH(col) - LENGTH(REPLACE(col, ' ', '')) + 1 > 2
THEN SUBSTRING_INDEX(col, ' ', -2)
ELSE NULL
END AS state
FROM bad_data) a
LEFT JOIN state_names b ON b.state = a.state) c
结果
| 色彩 | 城市 | 状态 |
-------------------------------------------------- ----
| 德克萨斯州春季 | 弹簧 | 德州 |
| 德克萨斯州科珀斯克里斯蒂 | 科珀斯克里斯蒂 | 德州 |
| 橙色加利福尼亚 | 橙色 | 加利福尼亚 |
| 纽约纽约 | 纽约 | 纽约 |
如果你不能建立一个表,那么这个查询应该这样做:
SELECT c.col,
TRIM(SUBSTR(c.col, 1, LENGTH(c.col) - LENGTH(c.state))) AS city,
c.state AS state
FROM (SELECT a.col,
CASE
WHEN b.state IS NULL
THEN SUBSTRING_INDEX(a.state, ' ', -1)
ELSE b.state
END AS state
FROM
(SELECT col,
CASE
WHEN LENGTH(col) - LENGTH(REPLACE(col, ' ', '')) + 1 = 2
THEN SUBSTRING_INDEX(col, ' ', -1)
WHEN LENGTH(col) - LENGTH(REPLACE(col, ' ', '')) + 1 > 2
THEN SUBSTRING_INDEX(col, ' ', -2)
ELSE NULL
END AS state
FROM bad_data) a
LEFT JOIN (SELECT 'Alabama' AS state
UNION ALL
SELECT 'Arizona'
UNION ALL
SELECT 'Arkansas'
UNION ALL
SELECT 'California'
UNION ALL
SELECT 'Colorado'
UNION ALL
SELECT 'Connecticut'
UNION ALL
SELECT 'Delaware'
UNION ALL
SELECT 'Florida'
UNION ALL
SELECT 'Georgia'
UNION ALL
SELECT 'Guam'
UNION ALL
SELECT 'Hawaii'
UNION ALL
SELECT 'Idaho'
UNION ALL
SELECT 'Illinois'
UNION ALL
SELECT 'Indiana'
UNION ALL
SELECT 'Iowa'
UNION ALL
SELECT 'Kansas'
UNION ALL
SELECT 'Kentucky'
UNION ALL
SELECT 'Louisiana'
UNION ALL
SELECT 'Maine'
UNION ALL
SELECT 'Maryland'
UNION ALL
SELECT 'Massachusetts'
UNION ALL
SELECT 'Michigan'
UNION ALL
SELECT 'Minnesota'
UNION ALL
SELECT 'Mississippi'
UNION ALL
SELECT 'Missouri'
UNION ALL
SELECT 'Montana'
UNION ALL
SELECT 'Nebraska'
UNION ALL
SELECT 'Nevada'
UNION ALL
SELECT 'New Hampshire'
UNION ALL
SELECT 'New Jersey'
UNION ALL
SELECT 'New Mexico'
UNION ALL
SELECT 'New York'
UNION ALL
SELECT 'North Carolina'
UNION ALL
SELECT 'North Dakota'
UNION ALL
SELECT 'Ohio'
UNION ALL
SELECT 'Oklahoma'
UNION ALL
SELECT 'Oregon'
UNION ALL
SELECT 'Pennsylvania'
UNION ALL
SELECT 'Puerto Rico'
UNION ALL
SELECT 'Rhode Island'
UNION ALL
SELECT 'South Carolina'
UNION ALL
SELECT 'South Dakota'
UNION ALL
SELECT 'Tennessee'
UNION ALL
SELECT 'Texas'
UNION ALL
SELECT 'Utah'
UNION ALL
SELECT 'Vermont'
UNION ALL
SELECT 'Virginia'
UNION ALL
SELECT 'Washington'
UNION ALL
SELECT 'West Virginia'
UNION ALL
SELECT 'Wisconsin'
UNION ALL
SELECT 'Wyoming') b ON b.state = a.state) c