我有一个应用程序需要接收数百万个 char* 作为输入参数(通常是少于 512 个字符的字符串(在 unicode 中)),并将它们转换并存储为 .net 字符串。
事实证明,它是我的应用程序性能的真正瓶颈。我想知道是否有一些设计模式或想法可以使其更有效。
有一个关键部分让我觉得它可以改进:有很多重复。假设有 100 万个对象进入,可能只有 50 个独特的 char* 模式。
作为记录,这是我用来将 char* 转换为字符串的算法(该算法在 C++ 中,但项目的其余部分在 C# 中)
String ^StringTools::MbCharToStr ( const char *Source )
{
String ^str;
if( (Source == NULL) || (Source[0] == '\0') )
{
str = gcnew String("");
}
else
{
// Find the number of UTF-16 characters needed to hold the
// converted UTF-8 string, and allocate a buffer for them.
const size_t max_strsize = 2048;
int wstr_size = MultiByteToWideChar (CP_UTF8, 0L, Source, -1, NULL, 0);
if (wstr_size < max_strsize)
{
// Save the malloc/free overhead if it's a reasonable size.
// Plus, KJN was having fits with exceptions within exception logging due
// to a corrupted heap.
wchar_t wstr[max_strsize];
(void) MultiByteToWideChar (CP_UTF8, 0L, Source, -1, wstr, (int) wstr_size);
str = gcnew String (wstr);
}
else
{
wchar_t *wstr = (wchar_t *)calloc (wstr_size, sizeof(wchar_t));
if (wstr == NULL)
throw gcnew PCSException (__FILE__, __LINE__, PCS_INSUF_MEMORY, MSG_SEVERE);
// Convert the UTF-8 string into the UTF-16 buffer, construct the
// result String from the UTF-16 buffer, and then free the buffer.
(void) MultiByteToWideChar (CP_UTF8, 0L, Source, -1, wstr, (int) wstr_size);
str = gcnew String ( wstr );
free (wstr);
}
}
return str;
}