2

我通过在 Visual Studio 2010 中将我自己的 C++ 项目编译为 DLL 将 CMU 的 PocketSphinx 与 Unity 集成,我从 Unity Pro 中的 C# 脚本调用它。我知道 dll 可以工作,因为我已经使用相同的代码将另一个项目作为 exe 进行了编译,并且它可以作为独立程序完美运行。我正在使用 pocketsphinx_continuous 项目示例,它获取麦克风输入并将文本输出到控制台。我已自定义此代码以从 Unity 内部调用,它应该作为字符串而不是控制台输出回我的 C# 代码。我觉得我几乎可以正常工作,但是 const char * 只是没有将其作为字符串返回。如果我使用这个声明,我最终会遇到访问冲突错误:

私有静态外部字符串识别来自麦克风();

所以,我尝试使用这个:

私有静态外部 IntPtr identify_from_microphone();

然后,我使用这行代码尝试打印该函数的输出:

print("你刚才说" + Marshal.PtrToStringAnsi(recognize_from_microphone()));

但是,然后我得到的只是“你刚才说的”作为回报。如果我这样做,我可以设法取回内存地址: print("you just said " + identify_from_microphone()); 所以,我知道有些东西正在被退回。

这是我的 C++ 代码(其中大部分最初是用 C 编写的,作为来自 pocketsphinx 的示例代码):

char* MakeStringCopy (const char* str) 
{
  if (str == NULL) return NULL;
  char* res = (char*)malloc(strlen(str) + 1);
  strcpy(res, str);
  return res;
}


extern __declspec(dllexport) const char * recognize_from_microphone()
{
//this is a near complete duplication of the code from main()
char const *cfg;
config = cmd_ln_init(NULL, ps_args(), TRUE,
"-hmm", MODELDIR "\\hmm\\en_US\\hub4wsj_sc_8k",
"-lm", MODELDIR "\\lm\\en\\turtle.DMP",
"-dict", MODELDIR "\\lm\\en\\turtle.dic",
NULL);

if (config == NULL)
{
   return "config is null";
}

ps = ps_init(config);
if (ps == NULL)
{
   return "ps is null";
}

ad_rec_t *ad;
int16 adbuf[4096];
int32 k, ts, rem;
char const *hyp;
char const *uttid;
cont_ad_t *cont;
char word[256];
char words[1024] = "";
//char temp[] = "hypothesis";
//hyp = temp;

if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),
                      (int)cmd_ln_float32_r(config, "-samprate"))) == NULL)
    E_FATAL("Failed to open audio device\n");

/* Initialize continuous listening module */
if ((cont = cont_ad_init(ad, ad_read)) == NULL)
    E_FATAL("Failed to initialize voice activity detection\n");
if (ad_start_rec(ad) < 0)
    E_FATAL("Failed to start recording\n");
if (cont_ad_calib(cont) < 0)
    E_FATAL("Failed to calibrate voice activity detection\n");

for (;;) {
    /* Indicate listening for next utterance */
    //printf("READY....\n");
    fflush(stdout);
    fflush(stderr);

    /* Wait data for next utterance */
    while ((k = cont_ad_read(cont, adbuf, 4096)) == 0)
        sleep_msec(100);

    if (k < 0)
        E_FATAL("Failed to read audio\n");

    /*
     * Non-zero amount of data received; start recognition of new utterance.
     * NULL argument to uttproc_begin_utt => automatic generation of utterance-id.
     */
    if (ps_start_utt(ps, NULL) < 0)
        E_FATAL("Failed to start utterance\n");

    ps_process_raw(ps, adbuf, k, FALSE, FALSE);
    //printf("Listening...\n");
    fflush(stdout);

    /* Note timestamp for this first block of data */
    ts = cont->read_ts;

    /* Decode utterance until end (marked by a "long" silence, >1sec) */
    for (;;) {

        /* Read non-silence audio data, if any, from continuous listening module */
        if ((k = cont_ad_read(cont, adbuf, 4096)) < 0)
            E_FATAL("Failed to read audio\n");
        if (k == 0) {
            /*
             * No speech data available; check current timestamp with most recent
             * speech to see if more than 1 sec elapsed.  If so, end of utterance.
             */
            if ((cont->read_ts - ts) > DEFAULT_SAMPLES_PER_SEC)
                break;
        }
        else {
            /* New speech data received; note current timestamp */
            ts = cont->read_ts;
        }

        /*
         * Decode whatever data was read above.
         */
        rem = ps_process_raw(ps, adbuf, k, FALSE, FALSE);

        /* If no work to be done, sleep a bit */
        if ((rem == 0) && (k == 0))
            sleep_msec(20);
    }

    /*
     * Utterance ended; flush any accumulated, unprocessed A/D data and stop
     * listening until current utterance completely decoded
     */
    ad_stop_rec(ad);
    while (ad_read(ad, adbuf, 4096) >= 0);
    cont_ad_reset(cont);
    fflush(stdout);
    /* Finish decoding, obtain and print result */
    ps_end_utt(ps);

    hyp = ps_get_hyp(ps, NULL, &uttid);
    fflush(stdout);

    /* Exit if the first word spoken was GOODBYE */
   //actually, for unity, exit if any word was spoken at all! this will avoid an infinite loop of doom!
    if (hyp) {
        /*sscanf(hyp, "%s", words);
        if (strcmp(word, "goodbye") == 0)*/
            break;
    }
   else
     return "nothing returned";
    /* Resume A/D recording for next utterance */
    if (ad_start_rec(ad) < 0)
        E_FATAL("Failed to start recording\n");
}
cont_ad_close(cont);
ad_close(ad);
ps_free(ps);
const char *temp = new char[1024];
temp = MakeStringCopy(hyp);
return temp;}

如果更改返回温度;返回“这里的一些字符串”;然后我看到文本出现在 Unity 中。不过,这并没有什么帮助,因为我不需要硬编码文本,我需要语音识别代码的输出,它最终存储在 hyp 变量中。

谁能帮我弄清楚我做错了什么?谢谢!

4

2 回答 2

3

问题是你不应该在 C++ 中分配原始内存并以这种方式在 C# 中使用它,谁会摆脱你在函数内部分配的内存MakeStringCopy

尝试这样的事情:

[DllImport("MyLibrary.dll")]
[return: MarshalAs(UnmanagedType.LPStr)] 
public static extern string GetStringValue();

这样,您就可以告诉封送处理程序 CLR 拥有调用该函数所产生的内存,并且它将负责解除分配。

此外,.Net 字符串包含 unicode 字符,这就是为什么您在尝试为其分配 ANSI 字符时遇到访问冲突错误的原因。使用该属性UnmanagedType.LPStr还可以告诉编组器它应该期望的字符类型,以便它可以为您进行转换。

最后,对于 C++ 端的内存分配,根据MSDN 中的这个示例,您应该使用函数CoTaskMemAlloc而不是malloc函数内部MakeStringCopy

于 2012-10-23T01:57:53.203 回答
2

终于搞定了!我最终不得不将一个 stringbuilder 对象传递给 C++ 函数并从 C# 中的该对象中获取字符串,就像我在这篇文章中发现的那样: http ://www.pcreview.co.uk/forums/passing-and-retrieving-字符串调用-c-function-c-t1367069.html

该代码比我想要的要慢,但至少现在可以使用。这是我的最终代码:

C#:

[DllImport ("pocketsphinx_unity",CallingConvention=CallingConvention.Cdecl,CharSet = CharSet.Ansi)]
private static extern void recognize_from_microphone(StringBuilder str);StringBuilder mytext= new StringBuilder(1000);
recognize_from_microphone(mytext);
print("you just said " + mytext.ToString());

C++:

extern __declspec(dllexport) void recognize_from_microphone(char * fromUnity){
static ps_decoder_t *ps;
static cmd_ln_t *config;

config = cmd_ln_init(NULL, ps_args(), TRUE,
"-hmm", MODELDIR "\\hmm\\en_US\\hub4wsj_sc_8k",
"-lm", MODELDIR "\\lm\\en\\turtle.DMP",
"-dict", MODELDIR "\\lm\\en\\turtle.dic",
NULL);

if (config == NULL)
{
    //return "config is null";
}

ps = ps_init(config);
if (ps == NULL)
{
    //return "ps is null";
}

ad_rec_t *ad;
int16 adbuf[4096];
int32 k, ts, rem;
char const *hyp;
char const *uttid;
cont_ad_t *cont;
//char word[256];
char * temp;

if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),
                      (int)cmd_ln_float32_r(config, "-samprate"))) == NULL)
    printf("Failed to open audio device\n");

/* Initialize continuous listening module */
if ((cont = cont_ad_init(ad, ad_read)) == NULL)
    printf("Failed to initialize voice activity detection\n");
if (ad_start_rec(ad) < 0)
    printf("Failed to start recording\n");
if (cont_ad_calib(cont) < 0)
    printf("Failed to calibrate voice activity detection\n");

for (;;) {
    /* Indicate listening for next utterance */
    //printf("READY....\n");
    fflush(stdout);
    fflush(stderr);

    /* Wait data for next utterance */
    while ((k = cont_ad_read(cont, adbuf, 4096)) == 0)
        sleep_msec(100);

    if (k < 0)
        printf("Failed to read audio\n");

    /*
     * Non-zero amount of data received; start recognition of new utterance.
     * NULL argument to uttproc_begin_utt => automatic generation of utterance-id.
     */
    if (ps_start_utt(ps, NULL) < 0)
        printf("Failed to start utterance\n");

    ps_process_raw(ps, adbuf, k, FALSE, FALSE);
    //printf("Listening...\n");
    fflush(stdout);

    /* Note timestamp for this first block of data */
    ts = cont->read_ts;

    /* Decode utterance until end (marked by a "long" silence, >1sec) */
    for (;;) {

        /* Read non-silence audio data, if any, from continuous listening module */
        if ((k = cont_ad_read(cont, adbuf, 4096)) < 0)
            printf("Failed to read audio 2nd\n");
        if (k == 0) {
            /*
             * No speech data available; check current timestamp with most recent
             * speech to see if more than 1 sec elapsed.  If so, end of utterance.
             */
            if ((cont->read_ts - ts) > DEFAULT_SAMPLES_PER_SEC)
                break;
        }
        else {
            /* New speech data received; note current timestamp */
            ts = cont->read_ts;
        }

        /*
         * Decode whatever data was read above.
         */
        rem = ps_process_raw(ps, adbuf, k, FALSE, FALSE);

        /* If no work to be done, sleep a bit */
        if ((rem == 0) && (k == 0))
            sleep_msec(20);
    }

    /*
     * Utterance ended; flush any accumulated, unprocessed A/D data and stop
     * listening until current utterance completely decoded
     */
    ad_stop_rec(ad);
    while (ad_read(ad, adbuf, 4096) >= 0);
    cont_ad_reset(cont);
    fflush(stdout);
    /* Finish decoding, obtain and print result */
    ps_end_utt(ps);

    hyp = ps_get_hyp(ps, NULL, &uttid);
    fflush(stdout);

    /* Exit if the first word spoken was GOODBYE */
    //actually, for unity, exit if any word was spoken at all! this will avoid an infinite loop of doom!
    if (hyp) {
        strcpy(fromUnity,hyp);
        break;               
    }
    else
        //return "nothing returned";
    /* Resume A/D recording for next utterance */
    if (ad_start_rec(ad) < 0)
        printf("Failed to start recording\n");
}

cont_ad_close(cont);
ad_close(ad);
ps_free(ps);
}
于 2012-10-24T05:10:25.420 回答