(注意:我看到了这篇文章,告诉我这是不是同样的问题:C:pthread 的性能,低于单线程)
我正在学习pthread库。我编写了同一个 C 程序的两个版本。该程序采用大型BAM 文件列表并使用samtools 库计算记录数。
这是单线程程序:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include "bam.h"
/** maximum number of threads */
static const int MAX_COUNT_THREADS=4;
struct Param
{
char* filename;
};
static void printCount(const char* filename,unsigned long count)
{
fprintf(stdout,"%s\t%ld\n",filename,count);
}
static void* scan_bam(void* ptr)
{
unsigned long count=0;
struct Param* params=(struct Param*)ptr;
bamFile in=bam_open(params->filename, "r") ;
bam_header_t *header= NULL;
bam1_t *b=bam_init1();
time_t rawtime;
time ( &rawtime );
fprintf(stderr,"STARTING : %s %s",params->filename,ctime(&rawtime));
if(in==0)
{
fprintf(stderr,"Cannot read %s.\n",params->filename);
exit(EXIT_FAILURE);
}
header= bam_header_read(in);
while((bam_read1(in, b)) > 0)
{
++count;
}
bam_destroy1(b);
bam_header_destroy(header);
bam_close(in);
printCount(params->filename,count);
time ( &rawtime );
fprintf(stderr,"end for %s %s",params->filename,ctime(&rawtime));
free(params);
return NULL;
}
int main(int argc,char** argv)
{
int optind=1;
while(optind<argc)
{
struct Param* params=(struct Param*)malloc(sizeof(struct Param));
if(params==0)
{
fprintf(stderr,"Out of memory.\n");
exit(EXIT_FAILURE);
}
params->filename=argv[optind++];
scan_bam(params);
}
return EXIT_SUCCESS;
}
和多线程程序。这个程序最多可以使用 5 个线程,并使用条件锁来计算线程数,并告诉主程序在需要时启动一个新线程。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <pthread.h>
#include <time.h>
#include "bam.h"
#define VERIFY_ZERO(a) do {if(a!=0) {\
fprintf(stderr,"Test failed at %s line %d (ret=%d).\n",__FILE__,__LINE__,a);\
exit(EXIT_FAILURE);\
}} while(0)
/** maximum number of threads */
static const int MAX_COUNT_THREADS=5;
struct Param
{
pthread_t thread;
char* filename;
};
struct GLOBALS {
/** lock to print */
pthread_mutex_t mutex_print;
/** condition: wait for free thread */
pthread_cond_t accept_new_thread;
/** condition lock */
pthread_mutex_t accept_new_thread_lock;
/** number of running threads */
int number_of_threads;
};
static struct GLOBALS globals={
PTHREAD_MUTEX_INITIALIZER,
PTHREAD_COND_INITIALIZER,
PTHREAD_MUTEX_INITIALIZER,
0
};
static void printCount(const char* filename,unsigned long count)
{
int ret=pthread_mutex_lock(&globals.mutex_print);
VERIFY_ZERO(ret);
fprintf(stdout,"%s\t%ld\n",filename,count);
ret=pthread_mutex_unlock(&globals.mutex_print);
VERIFY_ZERO(ret);
}
static void* scan_bam(void* ptr)
{
unsigned long count=0;
struct Param* params=(struct Param*)ptr;
bamFile in=bam_open(params->filename, "r") ;
bam_header_t *header= NULL;
bam1_t *b=bam_init1();
time_t rawtime;
time ( &rawtime );
fprintf(stderr,"STARTING : %s %s",params->filename,ctime(&rawtime));
if(in==0)
{
fprintf(stderr,"Cannot read %s.\n",params->filename);
exit(EXIT_FAILURE);
}
header= bam_header_read(in);
while((bam_read1(in, b)) > 0)
{
++count;
}
bam_destroy1(b);
bam_header_destroy(header);
bam_close(in);
printCount(params->filename,count);
time ( &rawtime );
fprintf(stderr,"end1 for %s %s",params->filename,ctime(&rawtime));
pthread_mutex_lock( &globals.accept_new_thread_lock);
globals.number_of_threads--;
pthread_cond_signal(&globals.accept_new_thread);
pthread_mutex_unlock(&globals.accept_new_thread_lock);
time ( &rawtime );
fprintf(stderr,"end2 for %s %s",params->filename,ctime(&rawtime));
free(params);
return NULL;
}
int main(int argc,char** argv)
{
int optind=1;
while(optind<argc)
{
struct Param* params=(struct Param*)malloc(sizeof(struct Param));
if(params==0)
{
fprintf(stderr,"Out of memory.\n");
exit(EXIT_FAILURE);
}
pthread_mutex_lock(&globals.accept_new_thread_lock);
while (globals.number_of_threads > MAX_COUNT_THREADS)
{
pthread_cond_wait(&globals.accept_new_thread, &globals.accept_new_thread_lock);
}
globals.number_of_threads++;
pthread_mutex_unlock(&globals.accept_new_thread_lock);
params->filename=argv[optind++];
fprintf(stderr,"creating %s\n",params->filename);
pthread_create (&(params->thread), NULL, scan_bam,params);
pthread_detach(params->thread);
}
pthread_mutex_lock(&globals.accept_new_thread_lock);
while (globals.number_of_threads > 0)
{
pthread_cond_wait(&globals.accept_new_thread, &globals.accept_new_thread_lock);
}
pthread_mutex_unlock(&globals.accept_new_thread_lock);
pthread_cond_destroy(&globals.accept_new_thread);
return EXIT_SUCCESS;
}
编译运行多线程程序
gcc -O3 -Wall jeter.c -pthread -I/usr/local/package/samtools-0.1.18 -L/usr/local/package/samtools-0.1.18/ -lbam -lz
$ time (find .// -name "*recal.bam" | grep Item1[0-9] | xargs ./a.out )
creating ./Item10/recal.bam
creating ./Item11/recal.bam
creating ./Item12/recal.bam
creating ./Item13/recal.bam
creating ./Item14/recal.bam
creating ./Item15/recal.bam
STARTING : ./Item10/recal.bam Tue Dec 18 15:12:48 2012
STARTING : ./Item11/recal.bam Tue Dec 18 15:12:48 2012
STARTING : ./Item12/recal.bam Tue Dec 18 15:12:48 2012
STARTING : ./Item14/recal.bam Tue Dec 18 15:12:48 2012
STARTING : ./Item13/recal.bam Tue Dec 18 15:12:48 2012
STARTING : ./Item15/recal.bam Tue Dec 18 15:12:48 2012
./Item10/recal.bam 185784310
end1 for ./Item10/recal.bam Tue Dec 18 15:38:16 2012
end2 for ./Item10/recal.bam Tue Dec 18 15:38:16 2012
creating ./Item16/recal.bam
STARTING : ./Item16/recal.bam Tue Dec 18 15:38:16 2012
./Item11/recal.bam 204408906
end1 for ./Item11/recal.bam Tue Dec 18 15:41:52 2012
end2 for ./Item11/recal.bam Tue Dec 18 15:41:52 2012
creating ./Item17/recal.bam
STARTING : ./Item17/recal.bam Tue Dec 18 15:41:52 2012
./Item12/recal.bam 207766317
end1 for ./Item12/recal.bam Tue Dec 18 15:42:17 2012
end2 for ./Item12/recal.bam Tue Dec 18 15:42:17 2012
creating ./Item18/recal.bam
STARTING : ./Item18/recal.bam Tue Dec 18 15:42:17 2012
./Item15/recal.bam 224957522
end1 for ./Item15/recal.bam Tue Dec 18 15:44:54 2012
end2 for ./Item15/recal.bam Tue Dec 18 15:44:54 2012
creating ./Item19/recal.bam
STARTING : ./Item19/recal.bam Tue Dec 18 15:44:54 2012
./Item13/recal.bam 224548326
end1 for ./Item13/recal.bam Tue Dec 18 15:45:32 2012
end2 for ./Item13/recal.bam Tue Dec 18 15:45:32 2012
./Item14/recal.bam 241267346
end1 for ./Item14/recal.bam Tue Dec 18 15:48:28 2012
end2 for ./Item14/recal.bam Tue Dec 18 15:48:28 2012
./Item16/recal.bam 227446579
end1 for ./Item16/recal.bam Tue Dec 18 16:12:15 2012
end2 for ./Item16/recal.bam Tue Dec 18 16:12:15 2012
./Item17/recal.bam 215307379
end1 for ./Item17/recal.bam Tue Dec 18 16:13:05 2012
end2 for ./Item17/recal.bam Tue Dec 18 16:13:05 2012
./Item18/recal.bam 225914723
end1 for ./Item18/recal.bam Tue Dec 18 16:13:48 2012
end2 for ./Item18/recal.bam Tue Dec 18 16:13:48 2012
./Item19/recal.bam 225509630
end1 for ./Item19/recal.bam Tue Dec 18 16:14:06 2012
end2 for ./Item19/recal.bam Tue Dec 18 16:14:06 2012
.
real 61m17.560s
user 66m0.476s
sys 4m5.980s
编译运行单线程程序
$ gcc -O3 -Wall jeter2.c -I/usr/local/package/samtools-0.1.18 -L/usr/local/package/samtools-0.1.18/ -lbam -lz
time (find .// -name "*recal.bam" | grep Item1[0-9] | xargs ./a.out )
STARTING : ./Item10/recal.bam Tue Dec 18 16:15:25 2012
./Item10/recal.bam 185784310
end for ./Item10/recal.bam Tue Dec 18 16:20:43 2012
STARTING : ./Item11/recal.bam Tue Dec 18 16:20:43 2012
./Item11/recal.bam 204408906
end for ./Item11/recal.bam Tue Dec 18 16:26:20 2012
STARTING : ./Item12/recal.bam Tue Dec 18 16:26:20 2012
./Item12/recal.bam 207766317
end for ./Item12/recal.bam Tue Dec 18 16:31:56 2012
STARTING : ./Item13/recal.bam Tue Dec 18 16:31:56 2012
./Item13/recal.bam 224548326
end for ./Item13/recal.bam Tue Dec 18 16:38:05 2012
STARTING : ./Item14/recal.bam Tue Dec 18 16:38:05 2012
./Item14/recal.bam 241267346
end for ./Item14/recal.bam Tue Dec 18 16:44:59 2012
STARTING : ./Item15/recal.bam Tue Dec 18 16:44:59 2012
./Item15/recal.bam 224957522
end for ./Item15/recal.bam Tue Dec 18 16:50:56 2012
STARTING : ./Item16/recal.bam Tue Dec 18 16:50:56 2012
./Item16/recal.bam 227446579
end for ./Item16/recal.bam Tue Dec 18 16:58:07 2012
STARTING : ./Item17/recal.bam Tue Dec 18 16:58:07 2012
./Item17/recal.bam 215307379
end for ./Item17/recal.bam Tue Dec 18 17:04:58 2012
STARTING : ./Item18/recal.bam Tue Dec 18 17:04:58 2012
./Item18/recal.bam 225914723
end for ./Item18/recal.bam Tue Dec 18 17:11:31 2012
STARTING : ./Item19/recal.bam Tue Dec 18 17:11:31 2012
./Item19/recal.bam 225509630
end for ./Item19/recal.bam Tue Dec 18 17:18:19 2012
.
real 62m54.503s
user 53m39.529s
sys 3m44.580s
这两个程序都运行了~1H00。所以 MT 程序的运行速度比另一个慢。为什么 ?是否可以加快该代码的速度?