-1

我的客户将姓名数据作为名称字符串发送给我,其中包括单个条目中的姓氏、名字和中间名。我需要将它们分成姓氏、名字和中间名。我在网上找到了一些脚本,但它们不能满足我的目的,因为它们要么 (1) 使用不同的格式,要么 (2) 不能很好地处理边缘情况。请参阅以下示例:

  1. 南丁格尔,佛罗伦萨 -> 佛罗伦萨南丁格尔
  2. 邦德,詹姆斯邦德 -> 詹姆斯邦德邦德
  3. 雅培,埃德温 A. -> 埃德温 A. 雅培

有人可以帮我编写一个 SQL Server 脚本,将一个字符串拆分成我正在寻找的各个部分吗?

4

5 回答 5

4

请注意以下事项:

  1. 始终请求标准化数据以确保最高的数据质量。我试图列举姓氏、名字和中间名组合的所有可能情况,但我确信我没有得到所有这些情况。
  2. 我的脚本需要以下格式:LastName@DELIMITER1@DELIMITER2FirstName@DELIMITER2MiddleName,但可以轻松更改为其他格式。
  3. 此脚本不会像 Dr. 那样分隔图块,也不会处理后缀。
  4. 感谢 MemKills 对我扩展的测试数据集的想法。

>

DECLARE @DELIMITER1 varchar(1), @DELIMITER2 varchar(1), @MAX_LENGTH int
SET @DELIMITER1 = ','
SET @DELIMITER2 = ' '
SET @MAX_LENGTH = 50

SELECT  [Name],
    SUBSTRING(Name,1,CHARINDEX(@DELIMITER1,Name) -1) AS LastName,                   -- Less one char for @DELIMITER1
    SUBSTRING(Name,CHARINDEX(@DELIMITER1,Name)+ 2,@MAX_LENGTH) AS FirstAndMiddle,   -- Plus two for @DELIMITER1 and @DELIMITER2
    CASE 
        -- Middle name follows two-name first names like Mary Ann 
        WHEN LEN(SUBSTRING(NAME, CHARINDEX(@DELIMITER1,Name)+ 2,@MAX_LENGTH)) - LEN(REPLACE(SUBSTRING(NAME, CHARINDEX(@DELIMITER1,Name)+ 2,@MAX_LENGTH), @DELIMITER2, '')) > 0
            THEN SUBSTRING(Name, LEN(Name) - CHARINDEX(@DELIMITER2, REVERSE(Name))+2, @MAX_LENGTH)
        ELSE NULL
    END AS MiddleName,

    CASE 
        -- Count the number of @DELIMITER2. Choose the string between the @DELIMITER1 and the final @DELIMITER2. 
        WHEN LEN(SUBSTRING(NAME, CHARINDEX(@DELIMITER1,Name)+ 2,@MAX_LENGTH)) - LEN(REPLACE(SUBSTRING(NAME, CHARINDEX(@DELIMITER1,Name)+ 2,@MAX_LENGTH), @DELIMITER2, '')) > 0
            Then SUBSTRING(Name, CHARINDEX(@DELIMITER1,Name)+ 2, 
                 (LEN(SUBSTRING(NAME, CHARINDEX(@DELIMITER1,Name)+ 2,@MAX_LENGTH))
                 - LEN(SUBSTRING(Name, LEN(Name) - CHARINDEX(@DELIMITER2, REVERSE(Name))+2, @MAX_LENGTH))))
        ELSE SUBSTRING(Name,CHARINDEX(@DELIMITER1,Name)+ 2,@MAX_LENGTH)
    END AS FirstName
FROM 
(
    SELECT  [Name] = 'Zzz, A' UNION ALL
    SELECT  'de Zzz, Aaa' UNION ALL
    SELECT  'Zzz, Aaaa' UNION ALL
    SELECT  'Zzz, A B' UNION ALL
    SELECT  'Zzz, Aaaa Bbbb' UNION ALL
    SELECT  'de Zzz, Aaaa' UNION ALL
    SELECT  'de Zzz, Aaaa B' UNION ALL
    SELECT  'van Zzz, Aaaa B' UNION ALL
    SELECT  'Yyy-Zzz, Aaaa B' UNION ALL
    SELECT  'd''Zzz, Aaaa B' UNION ALL
    SELECT  'Zzz, Aaaa Bbbb C' UNION ALL
    SELECT  'Zzz, Aaaa Bbbb Cccc'
) AS X
于 2014-08-05T15:30:23.297 回答
1

试试这个代码。我觉得效率更高一些。请随时修改或改进它。谢谢。


DECLARE @FullName VARCHAR(60),
        @FirstName VARCHAR(30),
        @LastName VARCHAR(30),

        @MiddleInitialPrep VARCHAR(60) = null,
        @MiddleInitial VARCHAR(1) = null

SET @FullName = 'Dr. John Edward Doe III'

-- NAME CLEAN UP TO REMOVE PREFIXES AND SUFFIXES
SET @FullName = REPLACE(@FullName, 'Mr. ', '')
SET @FullName = REPLACE(@FullName, 'Mr ', '')
SET @FullName = REPLACE(@FullName, 'Mrs. ', '')
SET @FullName = REPLACE(@FullName, 'Mrs ', '')
SET @FullName = REPLACE(@FullName, 'Ms. ', '')
SET @FullName = REPLACE(@FullName, 'Ms ', '')
SET @FullName = REPLACE(@FullName, 'Miss ', '')
SET @FullName = REPLACE(@FullName, 'Dr. ', '')
SET @FullName = REPLACE(@FullName, 'Dr ', '')
SET @FullName = REPLACE(@FullName, ' Jr.', '')
SET @FullName = REPLACE(@FullName, ' Jr', '')
SET @FullName = REPLACE(@FullName, ' Sr.', '')
SET @FullName = REPLACE(@FullName, ' Sr', '')
SET @FullName = REPLACE(@FullName, ' III', '')
SET @FullName = REPLACE(@FullName, ' II', '')

-- RETRIEVE FIRST AND LAST NAMES
SET @FirstName = LEFT(@FullName, NULLIF(CHARINDEX(' ', @FullName) - 1, -1))
SET @LastName = RIGHT(@FullName, ISNULL(NULLIF(CHARINDEX(' ', REVERSE(@FullName)) - 1, -1), LEN(@FullName)))

-- ISOLATE MIDDLE INITIAL
SET @MiddleInitialPrep = REPLACE(@FullName, @FirstName, '')
SET @MiddleInitialPrep = REPLACE(@MiddleInitialPrep, @LastName, '')
SET @MiddleInitial = REPLACE(@MiddleInitialPrep, ' ', '')

SELECT @FirstName First_Name, @MiddleInitial Middle_Initial, @LastName Last_Name
于 2014-11-04T16:14:23.987 回答
1

下面的代码适用于姓氏、名字的 M 名字符串。用您的名称字符串列名称替换“名称”。由于当有中间首字母时,您有一个句号作为最后一个字符,所以您将在每行(2、6 和 8)中用 3 替换 2,并将“RIGHT(Name,1)”更改为“RIGHT (姓名,2)”在第 8 行。

SELECT  SUBSTRING(Name, 1, CHARINDEX(',', Name) - 1) LastName ,
    CASE WHEN LEFT(RIGHT(Name, 2), 1) <> ' '
         THEN LTRIM(SUBSTRING(Name, CHARINDEX(',', Name) + 1, 99))
         ELSE LEFT(LTRIM(SUBSTRING(Name, CHARINDEX(',', Name) + 1, 99)),
                   LEN(LTRIM(SUBSTRING(Name, CHARINDEX(',', Name) + 1, 99)))
                   - 2)
    END FirstName ,
    CASE WHEN LEFT(RIGHT(Name, 2), 1) = ' ' THEN RIGHT(Name, 1)
         ELSE NULL
    END MiddleName
于 2015-06-10T02:02:32.617 回答
0

很好的解决方案。我做了一些修改以适应我的情况,其中分隔符是空格,中间名只是中间名的首字母(有时不存在)。以下解决方案甚至可以解析多间距名称,例如:“Jo Ann Taylor Haynes”,没有中间首字母。

SET @DELIMITER1 = ' '
SET @DELIMITER2 = ' '
SET @MAX_LENGTH = 50

SELECT  [Name],
    SUBSTRING(Name,1,CHARINDEX(@DELIMITER1,Name) -1) AS LastName,                  

    SUBSTRING(Name,CHARINDEX(@DELIMITER1,Name)+ 1,@MAX_LENGTH) AS FirstAndMiddle,   
    CASE 

        WHEN LEN(SUBSTRING(NAME, CHARINDEX(@DELIMITER1,Name)+ 1,@MAX_LENGTH)) - LEN(REPLACE(SUBSTRING(NAME, CHARINDEX(@DELIMITER1,Name)+ 1,@MAX_LENGTH), @DELIMITER2, '')) = 1
            THEN SUBSTRING(Name, LEN(Name) - CHARINDEX(@DELIMITER2, REVERSE(Name))+1, @MAX_LENGTH)
        ELSE NULL
    END AS MiddleName,

    CASE 

        WHEN LEN(SUBSTRING(NAME, CHARINDEX(@DELIMITER1,Name)+ 1,@MAX_LENGTH)) - LEN(REPLACE(SUBSTRING(NAME, CHARINDEX(@DELIMITER1,Name)+ 1,@MAX_LENGTH), @DELIMITER2, '')) = 1
            Then SUBSTRING(Name, CHARINDEX(@DELIMITER1,Name)+ 1, 
                 (LEN(SUBSTRING(NAME, CHARINDEX(@DELIMITER1,Name)+ 1,@MAX_LENGTH))
                 - LEN(SUBSTRING(Name, LEN(Name) - CHARINDEX(@DELIMITER2, REVERSE(Name))+1, @MAX_LENGTH))))
        ELSE SUBSTRING(Name,CHARINDEX(@DELIMITER1,Name)+ 1,@MAX_LENGTH)
    END AS FirstName
于 2017-06-13T15:37:19.040 回答
-4
select substr(
'santhosh kumar kota'
,1,
instr(
'santhosh kumar kota'
,' ' 
,1
,1)
) as fname
,substr('santhosh kumar kota'
,instr(
'santhosh kumar kota'
,' ' 
,1
,1)
,(instr(
'santhosh kumar kota'
,' ' 
,1
,2)-instr(
'santhosh kumar kota'
,' ' 
,1
,1)
)
)as mname
,substr('santhosh kumar kota'
,instr(
'santhosh kumar kota'
,' ' 
,1
,2)
,(length('santhosh kumar kota')+1)-instr(
'santhosh kumar kota'
,' ' 
,1
,2)
)as lname
from dual
/
于 2017-06-20T18:10:30.700 回答