迟到了 10 年才开始这次谈话,但仍在寻找一个优雅的解决方案,我通读了这个帖子,并决定走 @eselk 所走的路,但要扩展它:
public class FullNameDTO
{
public string Prefix { get; set; }
public string FirstName { get; set; }
public string MiddleName { get; set; }
public string LastName { get; set; }
public string Suffix { get; set; }
}
public static class FullName
{
public static FullNameDTO GetFullNameDto(string fullName)
{
string[] knownPrefixes = { "mr", "mrs", "ms", "miss", "dr", "sir", "madam", "master", "fr", "rev", "atty", "hon", "prof", "pres", "vp", "gov", "ofc" };
string[] knownSuffixes = { "jr", "sr", "ii", "iii", "iv", "v", "esq", "cpa", "dc", "dds", "vm", "jd", "md", "phd" };
string[] lastNamePrefixes = { "da", "de", "del", "dos", "el", "la", "st", "van", "von" };
var prefix = string.Empty;
var firstName = string.Empty;
var middleName = string.Empty;
var lastName = string.Empty;
var suffix = string.Empty;
var fullNameDto = new FullNameDTO
{
Prefix = prefix,
FirstName = firstName,
MiddleName = middleName,
LastName = lastName,
Suffix = suffix
};
// Split on period, commas or spaces, but don't remove from results.
var namePartsList = Regex.Split(fullName, "(?<=[., ])").ToList();
#region Clean out the crap.
for (var x = namePartsList.Count - 1; x >= 0; x--)
{
if (namePartsList[x].Trim() == string.Empty)
{
namePartsList.RemoveAt(x);
}
}
#endregion
#region Trim all of the parts in the list
for (var x = namePartsList.Count - 1; x >= 0; x--)
{
namePartsList[x] = namePartsList[x].Trim();
}
#endregion
#region Only one Name Part - assume a name like "Cher"
if (namePartsList.Count == 1)
{
firstName = namePartsList.First().Replace(",", string.Empty).Trim();
fullNameDto.FirstName = firstName;
namePartsList.RemoveAt(0);
}
#endregion
#region Get the Prefix
if (namePartsList.Count > 0)
{
//If we find a prefix, save it and drop it from the overall parts
var cleanedPart = namePartsList.First()
.Replace(".", string.Empty)
.Replace(",", string.Empty)
.Trim()
.ToLower();
if (knownPrefixes.Contains(cleanedPart))
{
prefix = namePartsList[0].Trim();
fullNameDto.Prefix = prefix;
namePartsList.RemoveAt(0);
}
}
#endregion
#region Get the Suffix
if (namePartsList.Count > 0)
{
#region Scan the full parts list for a potential Suffix
foreach (var namePart in namePartsList)
{
var cleanedPart = namePart.Replace(",", string.Empty)
.Trim()
.ToLower();
if (!knownSuffixes.Contains(cleanedPart.Replace(".", string.Empty))) { continue; }
if (namePart.ToLower() == "jr" && namePart != namePartsList.Last()) { continue; }
suffix = namePart.Replace(",", string.Empty).Trim();
fullNameDto.Suffix = suffix;
namePartsList.Remove(namePart);
break;
}
#endregion
}
#endregion
//If, strangely, there's nothing else in the overall parts... we're done here.
if (namePartsList.Count == 0) { return fullNameDto; }
#region Prefix/Suffix taken care of - only one "part" left.
if (namePartsList.Count == 1)
{
//If no prefix, assume first name (e.g. "Cher"), otherwise last (e.g. "Dr Jones", "Ms Jones")
if (prefix == string.Empty)
{
firstName = namePartsList.First().Replace(",", string.Empty).Trim();
fullNameDto.FirstName = firstName;
}
else
{
lastName = namePartsList.First().Replace(",", string.Empty).Trim();
fullNameDto.LastName = lastName;
}
}
#endregion
#region First part ends with a comma
else if (namePartsList.First().EndsWith(",") || (namePartsList.Count >= 3 && namePartsList.Any(n => n == ",") && namePartsList.Last() != ","))
{
#region Assume format: "Last, First"
if (namePartsList.First().EndsWith(","))
{
lastName = namePartsList.First().Replace(",", string.Empty).Trim();
fullNameDto.LastName = lastName;
namePartsList.Remove(namePartsList.First());
firstName = namePartsList.First();
fullNameDto.FirstName = firstName;
namePartsList.Remove(namePartsList.First());
if (!namePartsList.Any()) { return fullNameDto; }
foreach (var namePart in namePartsList)
{
middleName += namePart.Trim() + " ";
}
fullNameDto.MiddleName = middleName;
return fullNameDto;
}
#endregion
#region Assume strange scenario like "Last Suffix, First"
var indexOfComma = namePartsList.IndexOf(",");
#region Last Name is the first thing in the list
if (indexOfComma == 1)
{
namePartsList.Remove(namePartsList[indexOfComma]);
lastName = namePartsList.First().Replace(",", string.Empty).Trim();
fullNameDto.LastName = lastName;
namePartsList.Remove(namePartsList.First());
firstName = namePartsList.First();
fullNameDto.FirstName = firstName;
namePartsList.Remove(namePartsList.First());
if (!namePartsList.Any()) { return fullNameDto; }
foreach (var namePart in namePartsList)
{
middleName += namePart.Trim() + " ";
}
fullNameDto.MiddleName = middleName;
return fullNameDto;
}
#endregion
#region Last Name might be a prefixed one, like "da Vinci"
if (indexOfComma == 2)
{
var possibleLastPrefix = namePartsList.First()
.Replace(".", string.Empty)
.Replace(",", string.Empty)
.Trim()
.ToLower();
if (lastNamePrefixes.Contains(possibleLastPrefix))
{
namePartsList.Remove(namePartsList[indexOfComma]);
var lastPrefix = namePartsList.First().Trim();
namePartsList.Remove(lastPrefix);
lastName = $"{lastPrefix} {namePartsList.First().Replace(",", string.Empty).Trim()}";
fullNameDto.LastName = lastName;
namePartsList.Remove(namePartsList.First());
}
else
{
lastName = namePartsList.First().Replace(",", string.Empty).Trim();
namePartsList.Remove(namePartsList.First());
lastName = lastName + " " + namePartsList.First().Replace(",", string.Empty).Trim();
namePartsList.Remove(namePartsList.First());
fullNameDto.LastName = lastName;
}
namePartsList.Remove(",");
firstName = namePartsList.First();
fullNameDto.FirstName = firstName;
namePartsList.Remove(namePartsList.First());
if (!namePartsList.Any()) { return fullNameDto; }
foreach (var namePart in namePartsList)
{
middleName += namePart.Trim() + " ";
}
fullNameDto.MiddleName = middleName;
return fullNameDto;
}
#endregion
#endregion
}
#endregion
#region Everything else
else
{
if (namePartsList.Count >= 3)
{
firstName = namePartsList.First().Replace(",", string.Empty).Trim();
fullNameDto.FirstName = firstName;
namePartsList.RemoveAt(0);
//Check for possible last name prefix
var possibleLastPrefix = namePartsList[namePartsList.Count - 2]
.Replace(".", string.Empty)
.Replace(",", string.Empty)
.Trim()
.ToLower();
if (lastNamePrefixes.Contains(possibleLastPrefix))
{
lastName = $"{namePartsList[namePartsList.Count - 2].Trim()} {namePartsList[namePartsList.Count -1].Replace(",", string.Empty).Trim()}";
fullNameDto.LastName = lastName;
namePartsList.RemoveAt(namePartsList.Count - 1);
namePartsList.RemoveAt(namePartsList.Count - 1);
}
else
{
lastName = namePartsList.Last().Replace(",", string.Empty).Trim();
fullNameDto.LastName = lastName;
namePartsList.RemoveAt(namePartsList.Count - 1);
}
middleName = string.Join(" ", namePartsList).Trim();
fullNameDto.MiddleName = middleName;
namePartsList.Clear();
}
else
{
if (namePartsList.Count == 1)
{
lastName = namePartsList.First().Replace(",", string.Empty).Trim();
fullNameDto.LastName = lastName;
namePartsList.RemoveAt(0);
}
else
{
var possibleLastPrefix = namePartsList.First()
.Replace(".", string.Empty)
.Replace(",", string.Empty)
.Trim()
.ToLower();
if (lastNamePrefixes.Contains(possibleLastPrefix))
{
lastName = $"{namePartsList.First().Replace(",", string.Empty).Trim()} {namePartsList.Last().Replace(",", string.Empty).Trim()}";
fullNameDto.LastName = lastName;
namePartsList.Clear();
}
else
{
firstName = namePartsList.First().Replace(",", string.Empty).Trim();
fullNameDto.FirstName = firstName;
namePartsList.RemoveAt(0);
lastName = namePartsList.Last().Replace(",", string.Empty).Trim();
fullNameDto.LastName = lastName;
namePartsList.Clear();
}
}
}
}
#endregion
namePartsList.Clear();
fullNameDto.Prefix = prefix;
fullNameDto.FirstName = firstName;
fullNameDto.MiddleName = middleName;
fullNameDto.LastName = lastName;
fullNameDto.Suffix = suffix;
return fullNameDto;
}
}
这将处理相当多的不同场景,并且我已经针对它编写了(迄今为止)超过 50 个不同的单元测试以确保。
再次向@eselk 表示支持,因为他的想法帮助我编写了他出色解决方案的扩展版本。而且,作为奖励,这也处理了一个名为“JR”的人的奇怪实例。