首先,对不起我的英语不好。几周前我刚开始使用Objective-C。我正在做一个项目,需要我比较从两个 iOS 设备记录的两个音频信号。到目前为止,我设法从 iPhone 4s 和 iPhone 4 录制了两个 .aif 文件。然后我尝试应用以下算法 A 高度稳健的音频指纹识别系统。通过:Jaap Haitsma”获取两个指纹(二进制位模式 101011010),然后逐位比较它们。但到目前为止,我得到的结果在 45% 到 55% 之间,这几乎是随机概率介于 0 和 1 之间。所以有人可以给我任何建议。这是到目前为止的代码:
CalculateFingerprint *myCalculateFingerprint = [CalculateFingerprint alloc];
SInt16 *inputBuffer;
path4 = [documentsDirectory stringByAppendingPathComponent:fileName4];
/////////Calculate for the 4 file
fileURL = [NSURL fileURLWithPath:path4];
status = AudioFileOpenURL((__bridge CFURLRef)fileURL, kAudioFileReadPermission,kAudioFileAIFFType, &myAudioFile);
status = AudioFileGetPropertyInfo(myAudioFile,
kAudioFilePropertyAudioDataPacketCount,
&propertySizeDataPacketCount,
&writabilityDataPacketCount);
status = AudioFileGetProperty(myAudioFile,
kAudioFilePropertyAudioDataPacketCount,
&propertySizeDataPacketCount,
&numberOfPackets);
status = AudioFileGetPropertyInfo (myAudioFile,
kAudioFilePropertyMaximumPacketSize,
&propertySizeMaxPacketSize,
&writabilityMaxPacketSize);
status = AudioFileGetProperty(myAudioFile,
kAudioFilePropertyMaximumPacketSize,
&propertySizeMaxPacketSize,
&maxPacketSize);
inputBuffer = (SInt16 *)malloc(numberOfPackets * maxPacketSize);
currentPacket = 0;
status = AudioFileReadPackets(myAudioFile,
false, &numberOfBytesRead,
NULL,
currentPacket,
&numberOfPackets,
inputBuffer);
[myCalculateFingerprint calculateFingerprint:inputBuffer sampleCount:numberOfPackets index:indexFile];
status = AudioFileClose(myAudioFile);
下面是指纹码的计算:
-(void)calculateFingerprint :(SInt16*)samples
sampleCount:(int)sampleCount
index:(int)indexFile{
//Divide the audio signal into 32 frames
frames myFrames [32];
int stepFrames = sampleCount / 62;
int number = 0;
int index ;
for (int i = 0; i < 32; ++i){
index = 0;
myFrames[i].start = number;
myFrames[i].end = number + (32*stepFrames);
myFrames[i].dataFrames = (SInt16*)malloc((myFrames[i].end -number+1)*sizeof(SInt16));
for (int j = number;j<=myFrames[i].end; ++j){
myFrames[i].dataFrames[index] = samples[j];
++index;
}
number = number + stepFrames;
}
//Calculate FFT for each of the audio signal frames.
CalculateFFT *myCalculateFFT = [[CalculateFFT alloc] init];
theFFT myFFTData [32];
for (int i = 0; i <32; ++i){
myFFTData[i].FFTdata = [myCalculateFFT calculateFFTForData:myFrames[i].dataFrames];
}
//each index represent the frequency as followed:
// index i is frequency i * 44100/1024
//We only need 33 bands from 300 Hz to 2000Hz, so we will get the FFTdata from the index 7 to 40
float energy [33][33];
for (int i =0; i < 33; ++i){
energy[0][i] = 0;
}
int stepBand;
for (int i = 1; i < 33; ++i){
for (int j = 0; j < 33; ++j){
energy[i][j] = myFFTData[i].FFTdata[j+7];
}
}
//next we calculate the bits for the audio fingerprint
Float32 check = 0;
int fingerPrint [32][32];
NSMutableString *result = [[NSMutableString alloc]init];
for (int i = 0; i < 32; ++i){
for (int j = 0; j <32; ++j){
check = energy[i+1][j] -energy[i+1][j+1] -energy[i][j] +energy[i][j+1];
if (check > 0){
fingerPrint[i][j] = 1;//[tempBitFingerPrint addObject:[NSNumber numberWithInt:1]];
}else {
fingerPrint[i][j] = 0;//[tempBitFingerPrint addObject:[NSNumber numberWithInt:0]];
}
[result appendString:[NSString stringWithFormat:@"%d",fingerPrint[i][j]]];
}
}
最后是 FFT 计算代码:
-(void)FFTSetup{
UInt32 maxFrames = 1024;
originalReal = (float*) malloc(maxFrames*sizeof(float));
originalRealTransfer = (float*)malloc(maxFrames*sizeof(float));
obtainedReal = (float*) malloc(maxFrames *sizeof(float));
freqArray = (Float32*) malloc((maxFrames/2) *sizeof(Float32));
fftLog2n = log2f(maxFrames);
fftN = 1 << fftLog2n;
fftNOver2 = maxFrames/2;
fftBufferCapacity = maxFrames;
fftIndex = 0;
fftA.realp = (float*)malloc(fftNOver2*sizeof(float));
fftA.imagp = (float*)malloc(fftNOver2*sizeof(float));
fftSetup = vDSP_create_fftsetup(fftLog2n,FFT_RADIX2);
}
-(Float32*) calculateFFTForData:(SInt16*)sampleData { [self FFTSetup];
int stride = 1;
for (int i = 0; i < fftN; ++i){
originalReal[i] = (float) sampleData[i];
}
UInt32 maxFrames = 1024;
//Apply Hann window on the data
int windowSize = maxFrames;
float * window = (float*)malloc(sizeof(float)*windowSize);
memset(window, 0, sizeof(float)*windowSize);
vDSP_hann_window(window, windowSize, vDSP_HANN_NORM);
vDSP_vmul(originalReal,1,window,1,originalRealTransfer,1,windowSize);
vDSP_ctoz((COMPLEX*) originalRealTransfer,2,&fftA,1,fftNOver2);
vDSP_fft_zrip(fftSetup,&fftA, stride,fftLog2n,FFT_FORWARD);
float scale = (float) 1.0 /(2*fftN);
vDSP_vsmul(fftA.realp,1,&scale,fftA.realp,1,fftNOver2);
vDSP_vsmul(fftA.imagp,1,&scale,fftA.imagp,1,fftNOver2);
vDSP_ztoc(&fftA,1,(COMPLEX*)obtainedReal,2,fftNOver2);
int index = 0;
NSMutableString *testResult = [[NSMutableString alloc]init];
for (int i = 0; i < fftN; i=i+2){
freqArray[index] = (obtainedReal[i]*obtainedReal[i])+(obtainedReal[i+1]*obtainedReal[i+1]);
[testResult appendString:[NSString stringWithFormat:@"%f ",freqArray[index]]];
++index;
}
return freqArray;
}