我是一个分析电生理数据的 Matlab 用户。最近我发现如果相对偏移量(从当前位置到新位置的字节距离)大于 10^5 或 10^6 左右,则通过 NFS 共享文件进行 fseek 会很慢。
我用 C 检查了这个,似乎 fseek 很慢,而 lseek 不是。有什么好方法可以提高 NFS 上的 fseek 速度吗?将文件复制到本地驱动器不是一种选择,因为我正在处理实时数据。
当相对偏移量小于此值时,fseek 速度约为 10^-7 秒。当相对偏移量大于此值时,fseek 速度约为 10^-4 秒。无论偏移量大小如何,lseek 速度都是 10^-7 阶。我使用的是 CentOS6,用 Matlab 和 Eclipse 测试过。
欢迎任何建议。
下面是我的测试代码。由于 fseek 在内部将 SEEK_CUR 转换为 SEEK_SET,因此我在 SEEK_CUR 合适时使用 SEEK_SET。
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <unistd.h>
#include <sys/types.h>
#include <fcntl.h>
int test_fseek(char *fn);
int test_seek(char *fn);
void make_testfile(char *fn, long long fsize);
int main(void)
{
// char *fn = "/tmp/seek_test_junk"; // local file
char *fn = "/net/tera4/raiddata/0/_NAS_NFS_Exports_/shar/seek_test_junk"; // over nfs
long long fsize = 1000LL*1000*1000*4;
make_testfile(fn,fsize);
test_fseek(fn);
test_seek(fn);
return 0;
}
int test_fseek(char *fn)
{
int read_size = 20;
long seek_size, curr_pos=0;
char read_buff[20];
struct timespec ts0,ts1;
double rlapse, slapse;
FILE *fd;
fd = fopen(fn,"r");
if (fd==NULL)
{
fprintf(stderr,"File open error.\n");
return -1;
}
if (read_size>fread(read_buff,1,read_size,fd))
{
fprintf(stderr,"Error reading at 0.\n");
return(-1);
}
curr_pos = curr_pos+read_size;
//seek_size = 10000;
seek_size = 1e7;
while(seek_size<=1e7 && 10000<=seek_size)
{
clock_gettime(CLOCK_REALTIME,&ts0);
if (0!=fseek(fd,seek_size+curr_pos,SEEK_SET))
{
fprintf(stderr,"Error seeking at seek_size:%ld\n",seek_size);
return(-1);
}
clock_gettime(CLOCK_REALTIME,&ts1);
slapse = 1.0e-9 * (ts1.tv_nsec-ts0.tv_nsec + 1000000000L*(ts1.tv_sec-ts0.tv_sec));
curr_pos = curr_pos+seek_size;
clock_gettime(CLOCK_REALTIME,&ts0);
if (read_size>fread(read_buff,1,read_size,fd))
{
fprintf(stderr,"Error reading at seek_size: %ld\n",seek_size);
return(-1);
}
clock_gettime(CLOCK_REALTIME,&ts1);
rlapse = 1.0e-9 * (ts1.tv_nsec-ts0.tv_nsec + 1000000000L*(ts1.tv_sec-ts0.tv_sec));
curr_pos = curr_pos+read_size;
printf("Skipsize\t%ld\tRead\t%g\tFseek\t%g\n",seek_size,rlapse,slapse);
//seek_size = (long)(seek_size*1.5);
seek_size = (long)(seek_size/1.5);
}
fclose(fd);
printf("Finish\n");
return (0);
}
int test_seek(char *fn)
{
int read_size = 20;
long seek_size, curr_pos=0;
char read_buff[20];
struct timespec ts0,ts1;
double rlapse, slapse;
int fd;
fd = open(fn,O_RDONLY);
if (fd<-1)
{
fprintf(stderr,"File open error.\n");
return -1;
}
if (read_size>read(fd,read_buff,read_size))
{
fprintf(stderr,"Error reading at 0.\n");
return(-1);
}
curr_pos = curr_pos+read_size;
//seek_size = 10000;
seek_size = 1e7;
while(seek_size<=1e7 && 10000<=seek_size)
{
clock_gettime(CLOCK_REALTIME,&ts0);
if (0>lseek(fd,seek_size+curr_pos,0))
{
fprintf(stderr,"Error seeking at seek_size: %ld\n",seek_size);
return(-1);
}
clock_gettime(CLOCK_REALTIME,&ts1);
slapse = 1.0e-9 * (ts1.tv_nsec-ts0.tv_nsec + 1000000000L*(ts1.tv_sec-ts0.tv_sec));
curr_pos = curr_pos+seek_size;
clock_gettime(CLOCK_REALTIME,&ts0);
if (read_size>read(fd,read_buff,read_size))
{
fprintf(stderr,"Error reading at seek_size: %ld\n",seek_size);
return(-1);
}
clock_gettime(CLOCK_REALTIME,&ts1);
rlapse = 1.0e-9 * (ts1.tv_nsec-ts0.tv_nsec + 1000000000L*(ts1.tv_sec-ts0.tv_sec));
curr_pos = curr_pos+read_size;
printf("Skipsize\t%ld\tRead\t%g\tLseek\t%g\n",seek_size,rlapse,slapse);
//seek_size = (long)(seek_size*1.5);
seek_size = (long)(seek_size/1.5);
}
close(fd);
printf("Finish\n");
return (0);
}
void make_testfile(char *fn, long long fsize)
{
FILE *fd;
// test if file already exists
fd = fopen(fn,"r");
if (fd!=NULL)
{
if (0==fseek(fd,fsize,SEEK_SET))
{
fclose(fd);
printf("%s already exists.\n",fn);
return;
}else
{
fclose(fd);
printf("%s is too small.\n",fn);
}
}
else{
printf("%s does not exist.\n",fn);
}
fd = fopen(fn,"w");
if (fd==NULL)
{
fprintf(stderr,"File open error.\n");
return;
}
long long blksize = 1000LL*1000*100;
int *rndbuf;
long i;
long long j;
rndbuf = malloc(blksize*sizeof(int));
if (rndbuf==NULL)
{
fprintf(stderr,"Memory alloc error.\n");
return;
}
for (i = 0;i<fsize/blksize/sizeof(int);i++)
{
for (j = 0;j<blksize;j++)
{
rndbuf[j]=rand();
}
fwrite(rndbuf,sizeof(int),blksize,fd);
printf("%lld bytes written.\n", (i+1) * blksize * sizeof(int));
}
fclose(fd);
}
附加信息:(部分IP地址改为xx或yy)
nfstat -m 的结果:
/net/p390/common from p390:/common/
Flags: rw,nosuid,nodev,relatime,vers=4,rsize=1048576,wsize=1048576,namlen=255,hard,proto=tcp,port=0,timeo=600,retrans=2,sec=sys,clientaddr=xx.xx.xx.201,minorversion=0,local_lock=none,addr=xx.xx.xx.202
/net/tera1/raid0/data/_NAS_NFS_Exports_ from tera1:/raid0/data/_NAS_NFS_Exports_
Flags: rw,nosuid,nodev,relatime,vers=3,rsize=262144,wsize=262144,namlen=255,hard,proto=tcp,timeo=600,retrans=2,sec=sys,mountaddr=xx.xx.xx.245,mountvers=3,mountport=52591,mountproto=udp,local_lock=none,addr=xx.xx.xx.245
/net/tera1/raid0/data/_NAS_NFS_Exports_/share from tera1:/raid0/data/_NAS_NFS_Exports_/share
Flags: rw,nosuid,nodev,relatime,vers=3,rsize=262144,wsize=262144,namlen=255,hard,proto=tcp,timeo=600,retrans=2,sec=sys,mountaddr=xx.xx.xx.245,mountvers=3,mountport=52591,mountproto=udp,local_lock=none,addr=xx.xx.xx.245
/net/z80/home from z80:/home/
Flags: rw,nosuid,nodev,relatime,vers=4,rsize=1048576,wsize=1048576,namlen=255,hard,proto=tcp,port=0,timeo=600,retrans=2,sec=sys,clientaddr=xx.xx.xx.201,minorversion=0,local_lock=none,addr=xx.xx.xx.205
/net/tera4/raiddata/0/_NAS_NFS_Exports_ from tera4:/raiddata/0/_NAS_NFS_Exports_
Flags: rw,nosuid,nodev,relatime,vers=3,rsize=524288,wsize=524288,namlen=255,hard,proto=tcp,timeo=600,retrans=2,sec=sys,mountaddr=xx.xx.xx.247,mountvers=3,mountport=49573,mountproto=udp,local_lock=none,addr=xx.xx.xx.247
/net/tera4/raiddata/0/_NAS_NFS_Exports_/share from tera4:/raiddata/0/_NAS_NFS_Exports_/share
Flags: rw,nosuid,nodev,relatime,vers=3,rsize=524288,wsize=524288,namlen=255,hard,proto=tcp,timeo=600,retrans=2,sec=sys,mountaddr=xx.xx.xx.247,mountvers=3,mountport=49573,mountproto=udp,local_lock=none,addr=xx.xx.xx.247
/net/p390/home from p390:/home/
Flags: rw,nosuid,nodev,relatime,vers=4,rsize=1048576,wsize=1048576,namlen=255,hard,proto=tcp,port=0,timeo=600,retrans=2,sec=sys,clientaddr=xx.xx.xx.201,minorversion=0,local_lock=none,addr=xx.xx.xx.202
/net/sinuhe/data from sinuhe:/data
Flags: rw,nosuid,nodev,relatime,vers=3,rsize=1048576,wsize=1048576,namlen=255,hard,proto=tcp,timeo=600,retrans=2,sec=sys,mountaddr=yy.yy.yy.71,mountvers=3,mountport=892,mountproto=udp,local_lock=none,addr=yy.yy.yy.71
/net/sinuhe/opt from sinuhe:/opt
Flags: rw,nosuid,nodev,relatime,vers=3,rsize=1048576,wsize=1048576,namlen=255,hard,proto=tcp,timeo=600,retrans=2,sec=sys,mountaddr=yy.yy.yy.71,mountvers=3,mountport=892,mountproto=udp,local_lock=none,addr=yy.yy.yy.71
mount -v 的结果:
/dev/mapper/vg_megprec-LogVol01 on / type ext4 (rw)
proc on /proc type proc (rw)
sysfs on /sys type sysfs (rw)
devpts on /dev/pts type devpts (rw,gid=5,mode=620)
tmpfs on /dev/shm type tmpfs (rw,rootcontext="system_u:object_r:tmpfs_t:s0")
/dev/sdf1 on /boot type ext4 (rw)
none on /proc/sys/fs/binfmt_misc type binfmt_misc (rw)
sunrpc on /var/lib/nfs/rpc_pipefs type rpc_pipefs (rw)
p390:/common on /net/p390/common type nfs (rw,nosuid,nodev,intr,sloppy,vers=4,addr=xx.xx.xx.202,clientaddr=xx.xx.xx.201)
tera1:/raid0/data/_NAS_NFS_Exports_ on /net/tera1/raid0/data/_NAS_NFS_Exports_ type nfs (rw,nosuid,nodev,intr,sloppy,addr=xx.xx.xx.245)
tera1:/raid0/data/_NAS_NFS_Exports_/share on /net/tera1/raid0/data/_NAS_NFS_Exports_/share type nfs (rw,nosuid,nodev,intr,sloppy,addr=xx.xx.xx.245)
z80:/home on /net/z80/home type nfs (rw,nosuid,nodev,intr,sloppy,vers=4,addr=xx.xx.xx.205,clientaddr=xx.xx.xx.201)
tera4:/raiddata/0/_NAS_NFS_Exports_ on /net/tera4/raiddata/0/_NAS_NFS_Exports_ type nfs (rw,nosuid,nodev,intr,sloppy,addr=xx.xx.xx.247)
tera4:/raiddata/0/_NAS_NFS_Exports_/share on /net/tera4/raiddata/0/_NAS_NFS_Exports_/share type nfs (rw,nosuid,nodev,intr,sloppy,addr=xx.xx.xx.247)
p390:/home on /net/p390/home type nfs (rw,nosuid,nodev,intr,sloppy,vers=4,addr=xx.xx.xx.202,clientaddr=xx.xx.xx.201)
sinuhe:/data on /net/sinuhe/data type nfs (rw,nosuid,nodev,intr,sloppy,addr=yy.yy.yy.71)
sinuhe:/opt on /net/sinuhe/opt type nfs (rw,nosuid,nodev,intr,sloppy,addr=yy.yy.yy.71)