我使用一种方法,将名称中的字符更改为与英文名称使用频率相同“范围”内的其他字符。显然,姓名中字符的分布与普通会话英语不同。例如,“x”和“z”出现 0.245% 的时间,因此它们被交换。另一个极端,“w”的使用率为 5.5%,“s”为 6.86%,“t”为 15.978%。我将“s”更改为“w”,将“t”更改为“s”,将“w”更改为“t”。我将元音“aeio”放在一个单独的组中,以便一个元音只被另一个元音替换。类似地,“q”、“u”和“y”根本不会被替换。我的分组和决定完全是主观的。
我最终得到了 7 个不同的 2-5 个字符的“组”,主要基于频率。每个组中的字符与同一组中的其他字符交换。
最终结果是名称有点像可能是名称,但来自“不在此处”。
Original name Morphed name
Loren Nimag
Juanita Kuogewso
Tennyson Saggywig
David Mijsm
Julie Kunewa
这是我使用的 SQL,其中包括一个“TitleCase”函数。根据我在网上找到的不同字母频率,有 2 个不同版本的“变形”名称。
-- from https://stackoverflow.com/a/28712621
-- Convert and return param as Title Case
CREATE FUNCTION [dbo].[fnConvert_TitleCase] (@InputString VARCHAR(4000) )
RETURNS VARCHAR(4000)AS
BEGIN
DECLARE @Index INT
DECLARE @Char CHAR(1)
DECLARE @OutputString VARCHAR(255)
SET @OutputString = LOWER(@InputString)
SET @Index = 2
SET @OutputString = STUFF(@OutputString, 1, 1,UPPER(SUBSTRING(@InputString,1,1)))
WHILE @Index <= LEN(@InputString)
BEGIN
SET @Char = SUBSTRING(@InputString, @Index, 1)
IF @Char IN (' ', ';', ':', '!', '?', ',', '.', '_', '-', '/', '&','''','(','{','[','@')
IF @Index + 1 <= LEN(@InputString)
BEGIN
IF @Char != '''' OR UPPER(SUBSTRING(@InputString, @Index + 1, 1)) != 'S'
SET @OutputString = STUFF(@OutputString, @Index + 1, 1,UPPER(SUBSTRING(@InputString, @Index + 1, 1)))
END
SET @Index = @Index + 1
END
RETURN ISNULL(@OutputString,'')
END
Go
-- 00.045 x 0.045%
-- 00.045 z 0.045%
--
-- Replace(Replace(Replace(TS_NAME,'x','#'),'z','x'),'#','z')
--
-- 00.456 k 0.456%
-- 00.511 j 0.511%
-- 00.824 v 0.824%
-- kjv
-- Replace(Replace(Replace(Replace(TS_NAME,'k','#'),'j','k'),'v','j'),'#','v')
--
-- 01.642 g 1.642%
-- 02.284 n 2.284%
-- 02.415 l 2.415%
-- gnl
-- Replace(Replace(Replace(Replace(TS_NAME,'g','#'),'n','g'),'l','n'),'#','l')
--
-- 02.826 r 2.826%
-- 03.174 d 3.174%
-- 03.826 m 3.826%
-- rdm
-- Replace(Replace(Replace(Replace(TS_NAME,'r','#'),'d','r'),'m','d'),'#','m')
--
-- 04.027 f 4.027%
-- 04.200 h 4.200%
-- 04.319 p 4.319%
-- 04.434 b 4.434%
-- 05.238 c 5.238%
-- fhpbc
-- Replace(Replace(Replace(Replace(Replace(Replace(TS_NAME,'f','#'),'h','f'),'p','h'),'b','p'),'c','b'),'#','c')
--
-- 05.497 w 5.497%
-- 06.686 s 6.686%
-- 15.978 t 15.978%
-- wst
-- Replace(Replace(Replace(Replace(TS_NAME,'w','#'),'s','w'),'t','s'),'#','t')
--
--
-- 02.799 e 2.799%
-- 07.294 i 7.294%
-- 07.631 o 7.631%
-- 11.682 a 11.682%
-- eioa
-- Replace(Replace(Replace(Replace(Replace(TS_NAME,'e','#'),'i','ew'),'o','i'),'a','o'),'#','a')
--
-- -- dont replace
-- 00.222 q 0.222%
-- 00.763 y 0.763%
-- 01.183 u 1.183%
-- Obfuscate a name
Select
ts_id,
Cast(ts_name as varchar(42)) as [Original Name]
Cast(dbo.fnConvert_TitleCase(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(TS_NAME,'x','#'),'z','x'),'#','z'),'k','#'),'j','k'),'v','j'),'#','v'),'g','#'),'n','g'),'l','n'),'#','l'),'r','#'),'d','r'),'m','d'),'#','m'),'f','#'),'h','f'),'p','h'),'b','p'),'c','b'),'#','c'),'w','#'),'s','w'),'t','s'),'#','t'),'e','#'),'i','ew'),'o','i'),'a','o'),'#','a')) as VarChar(42)) As [morphed name] ,
Cast(dbo.fnConvert_TitleCase(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(Replace(TS_NAME,'e','t'),'~','e'),'t','~'),'a','o'),'~','a'),'o','~'),'i','n'),'~','i'),'n','~'),'s','h'),'~','s'),'h','r'),'r','~'),'d','l'),'~','d'),'l','~'),'m','w'),'~','m'),'w','f'),'f','~'),'g','y'),'~','g'),'y','p'),'p','~'),'b','v'),'~','b'),'v','k'),'k','~'),'x','~'),'j','x'),'~','j')) as VarChar(42)) As [morphed name2]
From
ts_users
;