1

亲爱的 stackoverflow 社区,

我目前正在研究一个本身调用外部 C++ 代码的 MEX 函数。我在 matlab 的双循环中调用我的 MEX 函数(48 x 48 次),创建一个相似度矩阵。相似度是通过前面提到的 MEX 函数计算的。

/*
 * Matlab interface function.
 */
void mexFunction(int nlhs, mxArray *plhs[],
        int nrhs, const mxArray *prhs[]) {
    if (nrhs < 2) {
        //we need exactly two input strings.
        mexErrMsgTxt("Two input strings are needed!");
        return;
    }
    if (nrhs > 5) {
        mexErrMsgTxt("Maximum number of inputs is 5!");
        return;
    }
    if (nrhs == 3 || nrhs == 4) {
        mexErrMsgTxt("You are expected to give all three score vectors: for meta, middle and nodes score.");
        return;
    }
    if (nlhs != 1) {
        //and we only give one output argument
        mexErrMsgTxt("The fragment distance only provides one output argument!");
        return;
    }
    //if possible get score vectors
    if (nrhs == 5) {
        extractScoreVector(prhs, 2, META_SCORENUM, meta_scores);
        extractScoreVector(prhs, 3, MIDDLE_SCORENUM, middle_scores);
        extractScoreVector(prhs, 4, NODES_SCORENUM, nodes_scores);
    } else {
        //otherwise take default scores
        meta_scores[meta_del] = -1;
        meta_scores[meta_ins] = -1;
        meta_scores[meta_match] = 10;
        meta_scores[meta_mismatch] = -2;
        middle_scores[0] = -6;
        middle_scores[1] = -6;
        nodes_scores[nodes_del] = -18;
        nodes_scores[nodes_ins] = -18;
        nodes_scores[nodes_skipl] = 0;
        nodes_scores[nodes_skipr] = 0;
    }

    //get both string inputs.

    std::string firstSeq = getMatlabString(prhs, 0);
    std::string sndSeq = getMatlabString(prhs, 1);

    //split them into node encodings.
    firstNodes = split(firstSeq, '|');
    sndNodes = split(sndSeq, '|');
    //initialize distance table.
    distanceTable = (int**) malloc(sizeof (int *) * firstNodes.size());
    for (unsigned int i = 0; i < firstNodes.size(); i++) {
        distanceTable[i] = (int*) malloc(sizeof (int) * sndNodes.size());
        for (unsigned int j = 0; j < sndNodes.size(); j++) {
            distanceTable[i][j] = -1;
        }
    }

    //construct input for nodes alignment: nodes are only represented by index with normed length to 3 (instead of index 1 we append 001).
    std::stringstream nodesInput;

    //first the node indices of the first fragment.
    for (unsigned int i = 0; i < firstNodes.size(); i++) {
        int magnitude = getMagnitude(i);
        for (int j = 0; j < 3 - magnitude; j++) {
            nodesInput << '0';
        }
        nodesInput << i << '|';
    }
    //then an @
    nodesInput << '@';
    //then the reversed indices of the second fragment with normed length to 3 (instead of index 1 we append 001).
    for (int i = sndNodes.size() - 1; i >= 0; i--) {
        int magnitude = getMagnitude(i);
        for (int j = 0; j < 3 - magnitude; j++) {
            nodesInput << '0';
        }
        nodesInput << i << '|';
    }
    nodesInput << '\0';

    //call nodes alignment.

    char* nodes_argv[2];
    //fake program name, dummy string
    nodes_argv[0] = (char*) "nodes";
    //actual input. The stringstream string has to be bound to a constant string
    //reference in order to prevent damage to the string behind it. a string stream
    //usually only protects its memory until the string is first evaluated.
    //this special construct prevents the memory from being overwritten.
    const std::string& tmp = nodesInput.str();
    nodes_argv[1] = const_cast<char*> (tmp.c_str());

    //call nodes alignment.
    gapc::Opts opts;
    try {
        //parse inputs
        opts.parse(2, nodes_argv);
    } catch (std::exception &e) {
        std::cerr << "Exception: " << e.what() << '\n';
        std::exit(1);
    }
    nodes obj;

    try {
        obj.init(opts);
    } catch (std::exception &e) {
        std::cerr << "Exception: " << e.what() << '\n';
        std::exit(1);
    }

    obj.cyk();

    gapc::return_type res = obj.run();

    //free distance table memory.
    for (unsigned int i = 0; i < firstNodes.size(); i++) {
        free(distanceTable[i]);
    }
    free(distanceTable);

    //clear the node vectors
    firstNodes.clear();
    sndNodes.clear();

    //Version for simple score return value
    //plhs[0] = mxCreateDoubleScalar(res);

    //Version for string return value
    std::stringstream nodeOutput;
    obj.print_result(nodeOutput, res);
    const std::string& outStr = nodeOutput.str();
    plhs[0] = mxCreateString(outStr.c_str());
}

外部代码是 gapc::opts 和 nodes obj 部分。到目前为止,外部代码没有已知的内存泄漏问题,所以我猜测问题出在我在这里发送的代码中。不幸的是,我无法找到错误。我试图手动释放代码中提到的任何变量,但这总是会导致 MATLAB 崩溃(正如我所见,Matlab 会尝试释放变量本身,如果内存中不再存在则崩溃)。

内存泄漏在这里很关键:在循环中大约 7 步之后,已经占用了大约 1 GB RAM,在我的测试用例中它上升到大约 13 GB RAM。这对于程序来说是不合理的,因此内存泄漏似乎很可能。

我还尝试在 stackoverflow 中找到修复程序,但这里提到的所有内容似乎都不适用于我的场景。

由于内存泄漏非常大,最合理的变量(因为它们包含最多的内容)是 firstSeq、sndSeq、firstNodes、sndNodes、distanceTable、opts 和 obj。

所以我的问题是:

  1. 我是否忘记释放其中一个变量?
  2. 您是否看到其他可能导致代码中内存泄漏的内容?
  3. 我该如何解决?

就我的研究而言,对象不必被释放,因为它们是自动管理的。仍然:某处内存必须泄漏。

/编辑

根据要求,我还提供了我的辅助函数的代码。请注意,函数“nodes_score”、“meta_score”和“node_distance”是从我使用 obj.run() 在代码中调用的外部函数调用的。

//using namespace std;

/*
 * This solution for the split problem is taken from
 * 
 * http://stackoverflow.com/questions/236129/splitting-a-string-in-c
 */
std::vector<std::string> &split(const std::string &s, char delim, std::vector<std::string> &elems) {
    std::stringstream ss(s);
    std::string item;
    while (std::getline(ss, item, delim)) {
        elems.push_back(item);
    }
    return elems;
}

std::vector<std::string> split(const std::string &s, char delim) {
    std::vector<std::string> elems;
    split(s, delim, elems);
    return elems;
}

//These vectors are global and contain the string encoding of the nodes for
//each fragment.
std::vector<std::string> firstNodes;
std::vector<std::string> sndNodes;
//this table contains the node distances for each combination of nodes.
int** distanceTable;

std::map<int, std::string> constructMetaMap(std::string nodeStr) {

    //get the single meta information strings
    std::vector<std::string> metaInfoStrs = split(nodeStr, '*');
    //initialize the map mapping meta information indices to the respective meta information content.
    std::map<int, std::string> metaMap;

    for (std::vector<std::string>::iterator metaInfoStr = metaInfoStrs.begin(); metaInfoStr != metaInfoStrs.end(); ++metaInfoStr) {
        //string stream for the meta info index.
        std::stringstream idxStream;
        int metaContentIdx = 1;
        for (std::string::iterator metaInfoChar = (*metaInfoStr).begin(); metaInfoChar != (*metaInfoStr).end(); ++metaInfoChar) {
            if (*metaInfoChar == '#') {
                //if we have finished looking for the current index, store the new map entry.
                int metaIdx;
                idxStream >> metaIdx;
                metaMap[metaIdx] = (*metaInfoStr).substr(metaContentIdx);
            } else {
                //otherwise store the current char and increment the start index of the actual meta info content.
                idxStream << *metaInfoChar;
                metaContentIdx++;
            }
        }
    }
    return metaMap;
}

const int MIDDLE_SCORENUM = 2;

int middle_scores[MIDDLE_SCORENUM];

/*
 * Emulates a call to meta alignment.
 * 
 * The node distance is defined as the sum over the distance between all meta
 * informations. If for a certain keyword no meta information exists in one of
 * the fragments a negative score is appended.
 */
int node_distance(unsigned int firstNodeIndex, unsigned int sndNodeIndex) {

    //check if the distance was already calculated.
    if (distanceTable[firstNodeIndex][sndNodeIndex] != -1) {
        return distanceTable[firstNodeIndex][sndNodeIndex];
    }

    //construct maps of keyword indices to meta information content.
    std::map<int, std::string> firstMetaMap = constructMetaMap(firstNodes[firstNodeIndex]);
    std::map<int, std::string> sndMetaMap = constructMetaMap(sndNodes[sndNodeIndex]);

    int node_distance_score = 0;
    //iterate over the first map.
    for (std::map<int, std::string>::const_iterator metaEntry = firstMetaMap.begin(); metaEntry != firstMetaMap.end(); ++metaEntry) {
        const int metaInfoIdx = metaEntry -> first;
        //if we don't have a value to that index in the second map, punish that.
        if (sndMetaMap.count(metaInfoIdx) == 0) {
            node_distance_score += middle_scores[0];
        } else {
            //otherwise do an alignment of the meta information.
            //and construct the input argument string array
            std::string sndMetaStr = sndMetaMap[metaInfoIdx];
            std::reverse(sndMetaStr.begin(), sndMetaStr.end());

            std::stringstream metaInput;
            metaInput << metaEntry -> second;
            metaInput << '@';
            metaInput << sndMetaStr;
            metaInput << '\0';

            char* argv[2];
            //fake program name, dummy string
            argv[0] = (char*) "meta";
            //actual input. The stringstream string has to be bound to a constant string
            //reference in order to prevent damage to the string behind it. a string stream
            //usually only protects its memory until the string is first evaluated.
            //this special construct prevents the memory from being overwritten.
            const std::string& tmp = metaInput.str();
            argv[1] = const_cast<char*> (tmp.c_str());

            //call meta alignment.
            gapc::Opts opts;
            try {
                opts.parse(2, argv);
            } catch (std::exception &e) {
                std::cerr << "Exception: " << e.what() << '\n';
                std::exit(1);
            }
            meta obj;

            try {
                obj.init(opts);
            } catch (std::exception &e) {
                std::cerr << "Exception: " << e.what() << '\n';
                std::exit(1);
            }
            gapc::add_event("start");

            obj.cyk();

            int metaScore = obj.run();
            node_distance_score += metaScore;
        }
    }
    //iterate over the second map
    for (std::map<int, std::string>::const_iterator metaEntry = sndMetaMap.begin(); metaEntry != sndMetaMap.end(); ++metaEntry) {
        const int metaInfoIdx = metaEntry -> first;
        //if we don't have a value to that index in the second map, punish that.
        if (firstMetaMap.count(metaInfoIdx) == 0) {
            node_distance_score += middle_scores[1];
        }
        //otherwise do nothing.
    }
    //store the result in the table.
    distanceTable[firstNodeIndex][sndNodeIndex] = node_distance_score;
    //clear the maps
    firstMetaMap.clear();
    sndMetaMap.clear();

    return node_distance_score;
}

const int META_SCORENUM = 6;
const int NODES_SCORENUM = 4;

int meta_scores[META_SCORENUM];
int nodes_scores[NODES_SCORENUM];

/*
 * Returns the score for a given operation
 */
int meta_score(meta_score_type type) {
    return meta_scores[(int) type];
}

/*
 * Returns the score for a given operation
 */
int nodes_score(nodes_score_type type) {
    return nodes_scores[(int) type];
}

// Utility function for extracting string inputs

std::string getMatlabString(const mxArray *prhs[], int strIndex) {
    const mxArray *strData = prhs[strIndex];
    int strLength = mxGetN(prhs[strIndex]) + 1;
    char *buf = mxArrayToString(strData);
    std::string s(buf);
    mxFree(buf);
    return s;
}

//Utility function for extracting the score vector.

void extractScoreVector(const mxArray *prhs[], int vecIdx, int scorelength, int scoreVec[]) {
    //Declarations
    const mxArray *vecData;
    double *singleVals;
    int rowLen, colLen;

    //Copy input pointer
    vecData = prhs[vecIdx];

    //Get matrix
    singleVals = (double*) mxGetPr(vecData);
    rowLen = mxGetN(vecData);
    colLen = mxGetM(vecData);

    //we don't care if it is a column or row vector but it has to be a
    //SCORENUM x 1 vector.
    if ((rowLen == 1 && colLen == scorelength) || (rowLen == scorelength && colLen == 1)) {
        for (int i = 0; i < scorelength; i++) {
            scoreVec[i] = (int) singleVals[i];
        }
    } else {
        mexErrMsgTxt("The score vector has the wrong number of entries!");
    }
}

int getMagnitude(int number) {
    if (number == 0) {
        return 1;
    }
    int magn = 0;
    while (number > 0) {
        magn++;
        number = number / 10;
    }
    return magn;
}
4

1 回答 1

2

您在以下两个语句中都泄漏了内存:

//get both string inputs.
std::string firstSeq(getMatlabString(prhs, 0));
std::string sndSeq(getMatlabString(prhs, 1));

getMatlabString正在为字符串分配内存,并返回一个指向已分配内存的指针。您正在复制指针指向的字符串的内容,但之后您永远不会释放内存。这也解释了为什么要解决这个问题,因为当 MEX 文件被卸载时clear mex_filename;,MATLAB 会自动释放通过和朋友分配的内存。mxMalloc我将getMatlabString函数重写为

std::string getMatlabString(const mxArray *prhs[], int strIndex) {
    const mxArray *strData = prhs[strIndex];
    int strLength = mxGetN(prhs[strIndex]) + 1;
    std::unique_ptr<char[], void(*)(char *)> 
      buf( static_cast<char *>(mxCalloc(strLength, sizeof(char))),
           []( char *p ) { mxFree(p); } );

    mxGetString(strData, buf.get(), strLength);
    return std::string(buf.get());
}

如果你的编译器不支持unique_ptr和 lambda 表达式,你可以用vector<char>.

此外,正如我在评论中提到的,我将更distanceTable改为vector<vector<int>> distanceTable;. 调用vector::resize()以将大小设置为您需要的任何值,如果该变量必须是全局的,则在使用完它以释放内存后与临时向量交换。

std::vector<std::vector<int>>().swap(distanceTable);

使用vector<char>上面而不是unique_ptr

std::string getMatlabString(const mxArray *prhs[], int strIndex) {
    const mxArray *strData = prhs[strIndex];
    int strLength = mxGetN(prhs[strIndex]) + 1;
    std::vector<char> buf( strLength, 0 );

    mxGetString(strData, &buf[0], strLength);
    return std::string(&buf[0]);
}
于 2013-09-18T16:25:17.977 回答