亲爱的 stackoverflow 社区,
我目前正在研究一个本身调用外部 C++ 代码的 MEX 函数。我在 matlab 的双循环中调用我的 MEX 函数(48 x 48 次),创建一个相似度矩阵。相似度是通过前面提到的 MEX 函数计算的。
/*
* Matlab interface function.
*/
void mexFunction(int nlhs, mxArray *plhs[],
int nrhs, const mxArray *prhs[]) {
if (nrhs < 2) {
//we need exactly two input strings.
mexErrMsgTxt("Two input strings are needed!");
return;
}
if (nrhs > 5) {
mexErrMsgTxt("Maximum number of inputs is 5!");
return;
}
if (nrhs == 3 || nrhs == 4) {
mexErrMsgTxt("You are expected to give all three score vectors: for meta, middle and nodes score.");
return;
}
if (nlhs != 1) {
//and we only give one output argument
mexErrMsgTxt("The fragment distance only provides one output argument!");
return;
}
//if possible get score vectors
if (nrhs == 5) {
extractScoreVector(prhs, 2, META_SCORENUM, meta_scores);
extractScoreVector(prhs, 3, MIDDLE_SCORENUM, middle_scores);
extractScoreVector(prhs, 4, NODES_SCORENUM, nodes_scores);
} else {
//otherwise take default scores
meta_scores[meta_del] = -1;
meta_scores[meta_ins] = -1;
meta_scores[meta_match] = 10;
meta_scores[meta_mismatch] = -2;
middle_scores[0] = -6;
middle_scores[1] = -6;
nodes_scores[nodes_del] = -18;
nodes_scores[nodes_ins] = -18;
nodes_scores[nodes_skipl] = 0;
nodes_scores[nodes_skipr] = 0;
}
//get both string inputs.
std::string firstSeq = getMatlabString(prhs, 0);
std::string sndSeq = getMatlabString(prhs, 1);
//split them into node encodings.
firstNodes = split(firstSeq, '|');
sndNodes = split(sndSeq, '|');
//initialize distance table.
distanceTable = (int**) malloc(sizeof (int *) * firstNodes.size());
for (unsigned int i = 0; i < firstNodes.size(); i++) {
distanceTable[i] = (int*) malloc(sizeof (int) * sndNodes.size());
for (unsigned int j = 0; j < sndNodes.size(); j++) {
distanceTable[i][j] = -1;
}
}
//construct input for nodes alignment: nodes are only represented by index with normed length to 3 (instead of index 1 we append 001).
std::stringstream nodesInput;
//first the node indices of the first fragment.
for (unsigned int i = 0; i < firstNodes.size(); i++) {
int magnitude = getMagnitude(i);
for (int j = 0; j < 3 - magnitude; j++) {
nodesInput << '0';
}
nodesInput << i << '|';
}
//then an @
nodesInput << '@';
//then the reversed indices of the second fragment with normed length to 3 (instead of index 1 we append 001).
for (int i = sndNodes.size() - 1; i >= 0; i--) {
int magnitude = getMagnitude(i);
for (int j = 0; j < 3 - magnitude; j++) {
nodesInput << '0';
}
nodesInput << i << '|';
}
nodesInput << '\0';
//call nodes alignment.
char* nodes_argv[2];
//fake program name, dummy string
nodes_argv[0] = (char*) "nodes";
//actual input. The stringstream string has to be bound to a constant string
//reference in order to prevent damage to the string behind it. a string stream
//usually only protects its memory until the string is first evaluated.
//this special construct prevents the memory from being overwritten.
const std::string& tmp = nodesInput.str();
nodes_argv[1] = const_cast<char*> (tmp.c_str());
//call nodes alignment.
gapc::Opts opts;
try {
//parse inputs
opts.parse(2, nodes_argv);
} catch (std::exception &e) {
std::cerr << "Exception: " << e.what() << '\n';
std::exit(1);
}
nodes obj;
try {
obj.init(opts);
} catch (std::exception &e) {
std::cerr << "Exception: " << e.what() << '\n';
std::exit(1);
}
obj.cyk();
gapc::return_type res = obj.run();
//free distance table memory.
for (unsigned int i = 0; i < firstNodes.size(); i++) {
free(distanceTable[i]);
}
free(distanceTable);
//clear the node vectors
firstNodes.clear();
sndNodes.clear();
//Version for simple score return value
//plhs[0] = mxCreateDoubleScalar(res);
//Version for string return value
std::stringstream nodeOutput;
obj.print_result(nodeOutput, res);
const std::string& outStr = nodeOutput.str();
plhs[0] = mxCreateString(outStr.c_str());
}
外部代码是 gapc::opts 和 nodes obj 部分。到目前为止,外部代码没有已知的内存泄漏问题,所以我猜测问题出在我在这里发送的代码中。不幸的是,我无法找到错误。我试图手动释放代码中提到的任何变量,但这总是会导致 MATLAB 崩溃(正如我所见,Matlab 会尝试释放变量本身,如果内存中不再存在则崩溃)。
内存泄漏在这里很关键:在循环中大约 7 步之后,已经占用了大约 1 GB RAM,在我的测试用例中它上升到大约 13 GB RAM。这对于程序来说是不合理的,因此内存泄漏似乎很可能。
我还尝试在 stackoverflow 中找到修复程序,但这里提到的所有内容似乎都不适用于我的场景。
由于内存泄漏非常大,最合理的变量(因为它们包含最多的内容)是 firstSeq、sndSeq、firstNodes、sndNodes、distanceTable、opts 和 obj。
所以我的问题是:
- 我是否忘记释放其中一个变量?
- 您是否看到其他可能导致代码中内存泄漏的内容?
- 我该如何解决?
就我的研究而言,对象不必被释放,因为它们是自动管理的。仍然:某处内存必须泄漏。
/编辑
根据要求,我还提供了我的辅助函数的代码。请注意,函数“nodes_score”、“meta_score”和“node_distance”是从我使用 obj.run() 在代码中调用的外部函数调用的。
//using namespace std;
/*
* This solution for the split problem is taken from
*
* http://stackoverflow.com/questions/236129/splitting-a-string-in-c
*/
std::vector<std::string> &split(const std::string &s, char delim, std::vector<std::string> &elems) {
std::stringstream ss(s);
std::string item;
while (std::getline(ss, item, delim)) {
elems.push_back(item);
}
return elems;
}
std::vector<std::string> split(const std::string &s, char delim) {
std::vector<std::string> elems;
split(s, delim, elems);
return elems;
}
//These vectors are global and contain the string encoding of the nodes for
//each fragment.
std::vector<std::string> firstNodes;
std::vector<std::string> sndNodes;
//this table contains the node distances for each combination of nodes.
int** distanceTable;
std::map<int, std::string> constructMetaMap(std::string nodeStr) {
//get the single meta information strings
std::vector<std::string> metaInfoStrs = split(nodeStr, '*');
//initialize the map mapping meta information indices to the respective meta information content.
std::map<int, std::string> metaMap;
for (std::vector<std::string>::iterator metaInfoStr = metaInfoStrs.begin(); metaInfoStr != metaInfoStrs.end(); ++metaInfoStr) {
//string stream for the meta info index.
std::stringstream idxStream;
int metaContentIdx = 1;
for (std::string::iterator metaInfoChar = (*metaInfoStr).begin(); metaInfoChar != (*metaInfoStr).end(); ++metaInfoChar) {
if (*metaInfoChar == '#') {
//if we have finished looking for the current index, store the new map entry.
int metaIdx;
idxStream >> metaIdx;
metaMap[metaIdx] = (*metaInfoStr).substr(metaContentIdx);
} else {
//otherwise store the current char and increment the start index of the actual meta info content.
idxStream << *metaInfoChar;
metaContentIdx++;
}
}
}
return metaMap;
}
const int MIDDLE_SCORENUM = 2;
int middle_scores[MIDDLE_SCORENUM];
/*
* Emulates a call to meta alignment.
*
* The node distance is defined as the sum over the distance between all meta
* informations. If for a certain keyword no meta information exists in one of
* the fragments a negative score is appended.
*/
int node_distance(unsigned int firstNodeIndex, unsigned int sndNodeIndex) {
//check if the distance was already calculated.
if (distanceTable[firstNodeIndex][sndNodeIndex] != -1) {
return distanceTable[firstNodeIndex][sndNodeIndex];
}
//construct maps of keyword indices to meta information content.
std::map<int, std::string> firstMetaMap = constructMetaMap(firstNodes[firstNodeIndex]);
std::map<int, std::string> sndMetaMap = constructMetaMap(sndNodes[sndNodeIndex]);
int node_distance_score = 0;
//iterate over the first map.
for (std::map<int, std::string>::const_iterator metaEntry = firstMetaMap.begin(); metaEntry != firstMetaMap.end(); ++metaEntry) {
const int metaInfoIdx = metaEntry -> first;
//if we don't have a value to that index in the second map, punish that.
if (sndMetaMap.count(metaInfoIdx) == 0) {
node_distance_score += middle_scores[0];
} else {
//otherwise do an alignment of the meta information.
//and construct the input argument string array
std::string sndMetaStr = sndMetaMap[metaInfoIdx];
std::reverse(sndMetaStr.begin(), sndMetaStr.end());
std::stringstream metaInput;
metaInput << metaEntry -> second;
metaInput << '@';
metaInput << sndMetaStr;
metaInput << '\0';
char* argv[2];
//fake program name, dummy string
argv[0] = (char*) "meta";
//actual input. The stringstream string has to be bound to a constant string
//reference in order to prevent damage to the string behind it. a string stream
//usually only protects its memory until the string is first evaluated.
//this special construct prevents the memory from being overwritten.
const std::string& tmp = metaInput.str();
argv[1] = const_cast<char*> (tmp.c_str());
//call meta alignment.
gapc::Opts opts;
try {
opts.parse(2, argv);
} catch (std::exception &e) {
std::cerr << "Exception: " << e.what() << '\n';
std::exit(1);
}
meta obj;
try {
obj.init(opts);
} catch (std::exception &e) {
std::cerr << "Exception: " << e.what() << '\n';
std::exit(1);
}
gapc::add_event("start");
obj.cyk();
int metaScore = obj.run();
node_distance_score += metaScore;
}
}
//iterate over the second map
for (std::map<int, std::string>::const_iterator metaEntry = sndMetaMap.begin(); metaEntry != sndMetaMap.end(); ++metaEntry) {
const int metaInfoIdx = metaEntry -> first;
//if we don't have a value to that index in the second map, punish that.
if (firstMetaMap.count(metaInfoIdx) == 0) {
node_distance_score += middle_scores[1];
}
//otherwise do nothing.
}
//store the result in the table.
distanceTable[firstNodeIndex][sndNodeIndex] = node_distance_score;
//clear the maps
firstMetaMap.clear();
sndMetaMap.clear();
return node_distance_score;
}
const int META_SCORENUM = 6;
const int NODES_SCORENUM = 4;
int meta_scores[META_SCORENUM];
int nodes_scores[NODES_SCORENUM];
/*
* Returns the score for a given operation
*/
int meta_score(meta_score_type type) {
return meta_scores[(int) type];
}
/*
* Returns the score for a given operation
*/
int nodes_score(nodes_score_type type) {
return nodes_scores[(int) type];
}
// Utility function for extracting string inputs
std::string getMatlabString(const mxArray *prhs[], int strIndex) {
const mxArray *strData = prhs[strIndex];
int strLength = mxGetN(prhs[strIndex]) + 1;
char *buf = mxArrayToString(strData);
std::string s(buf);
mxFree(buf);
return s;
}
//Utility function for extracting the score vector.
void extractScoreVector(const mxArray *prhs[], int vecIdx, int scorelength, int scoreVec[]) {
//Declarations
const mxArray *vecData;
double *singleVals;
int rowLen, colLen;
//Copy input pointer
vecData = prhs[vecIdx];
//Get matrix
singleVals = (double*) mxGetPr(vecData);
rowLen = mxGetN(vecData);
colLen = mxGetM(vecData);
//we don't care if it is a column or row vector but it has to be a
//SCORENUM x 1 vector.
if ((rowLen == 1 && colLen == scorelength) || (rowLen == scorelength && colLen == 1)) {
for (int i = 0; i < scorelength; i++) {
scoreVec[i] = (int) singleVals[i];
}
} else {
mexErrMsgTxt("The score vector has the wrong number of entries!");
}
}
int getMagnitude(int number) {
if (number == 0) {
return 1;
}
int magn = 0;
while (number > 0) {
magn++;
number = number / 10;
}
return magn;
}