代码如下,麻烦的部分是getTop()
函数:函数中有一个while循环calculate()
,而且输入文件很大,当我尝试用2000行输入文件的小版本来处理时,程序从来没有出错。但是当我尝试一个130,000行输入文件的大版本时,我基本上得到了两种错误。
#include "UserUserSim.h"
UserUserSim::UserUserSim(string &query_url):
_query_url(query_url)
{
}
void UserUserSim::calculate(ostream &out){
ifstream infile(_query_url.c_str());
string line;
int movie_id;
int user_id;
MovieList* ml;
while(infile>>line){
if (line[line.length()-1]==':'){
movie_id=atoi(line.c_str());
ml=MovieDictionary::getInstance().getMovie(movie_id);
ml->sortList();
out<<line<<endl;
}
else{
user_id=atoi(line.c_str());
if (_map.find(user_id)==_map.end())
getTop(user_id);
float score=getScore(user_id,ml);
out<<score<<endl;
}
}
}
float UserUserSim::getScore(int &user_id, MovieList* ml){
vector<USim>* p=_map[user_id];
vector<UserScore>::iterator it=ml->begin();
vector<USim>::iterator sim_it=p->begin();
float score=0;
float score2=0;
int total_num=0;
float total_weight=0;
int it_user_id;
int sim_it_user_id;
while( it != ml->end() && sim_it != p->end()){
it_user_id=(*it).user_id;
sim_it_user_id=(*sim_it).user_id;
//cout<<it_user_id<<" sdfsd "<<sim_it_user_id<<endl;
if (it_user_id>sim_it_user_id)
sim_it++;
else if (it_user_id<sim_it_user_id)
it++;
else{ // when the user id matches
score+=(*it).rating*(*sim_it).score;
score2+=(*it).rating;
total_num++;
total_weight+=(*sim_it).score;
sim_it++;
it++;
}
}
if (total_weight!=0)
score=score/total_weight;
else
score=3.37827;//score2/total_num;
return score;
}
typedef pair<int, float> mapPair;
bool compareSim(USim p1, USim p2){
return p1.score>p2.score;
}
bool compareID(USim p1, USim p2){
return p1.user_id<p2.user_id;
}
void UserUserSim::getTop(int user_id){
vector<USim>* p=new vector<USim>;
_map.insert(pair<int,vector<USim>*>(user_id,p));
UserList* ul=UserDictionary::getInstance().getUser(user_id);
vector<MovieScore>::iterator it;
vector<UserScore>::iterator it_movie; // the iterator for the movielist
vector<USim> score_list;
vector<USim>::iterator it_s1;
vector<USim>::iterator it_s2;
for (it=ul->begin();it!=ul->end();++it){
vector<USim> new_score_list;
int movie_id=(*it).movie_id;
it_s1=score_list.begin();
MovieList* ml=MovieDictionary::getInstance().getMovie(movie_id);
for(it_movie=ml->begin();it_movie!=ml->end();it_movie++){
int user_id=(*it_movie).user_id;
int rating=(*it_movie).rating;
while( it_s1!=score_list.end() && user_id>(*it_s1).user_id){
new_score_list.push_back((*it_s1));
it_s1++;
}
USim us;
us.user_id=user_id;
if (it_s1!=score_list.end() && user_id==(*it_s1).user_id){
us.score=(*it_s1).score+rating*(*it).rating;// old allocated score+ new score
it_s1++;
}else{
us.score=rating*(*it).rating;
}
new_score_list.push_back(us);// the vector's user rating x the rating of the movie,user
}
//copy the new_score_list into the score_list:
score_list.clear();
for (it_s2=new_score_list.begin();it_s2!=new_score_list.end();++it_s2)
score_list.push_back(*it_s2);
}
int k=10;
partial_sort(score_list.begin(),score_list.begin()+k,score_list.end(),compareSim);
vector<USim>::iterator v_it;
//store these users into the user_sim vector
for (v_it=score_list.begin();v_it<score_list.begin()+k;++v_it){
USim us=(*v_it);
p->push_back(us);
}
sort(p->begin(),p->end(),compareID);
}
这是错误我:
recommend(71304) malloc: *** error for object 0x7f96d3ec8618: incorrect checksum for freed object - object was probably modified after being freed.
*** set a breakpoint in malloc_error_break to debug
Abort trap: 6
或错误二:
Segmentation Fault:11
每次发生错误时,我都会检查断点,它们都是不同的,大约从第 50,000 行到第 100,000 行。所以我想这是一个内存问题。而且我还在运行代码之前输入了ulimit,错误仍然发生。真的希望有人告诉我错误在哪里,或者至少教我如何调试内存问题。