EDIT: shaved roughly 10% processing time with:
register int16_t *libwordPointer = libword;
int16_t *nReset;
register int16_t *wordsPointer = words[word];
int16_t *mReset = wordsPointer;
for( int n=0 ; n<Rows ; n++ ){
nReset = libwordPointer;
wordsPointer = mReset;
for( int m=0 ; m<Col ; m++ ){
temp = 0;
libwordPointer = nReset;
for( int ii=0 ; ii<Q ; ii++ ){
temp += lookUp( abs( *libwordPointer - *wordsPointer ) );
libwordPointer++;
wordsPointer++;
}
D[n][m] = temp;
}
}
///////////////////////////////////////////////////////////////////////////////////////////
Any expert opinions on how to make this faster???? Award is a million dollars :)
float dtw( int16_t (*words)[L][Q], int16_t (*libword)[Q], int16_t libcount, char word ){
float Dist, k=0;
// int Dn;
register int Rows = libcount;
register float A, B, C;
register float temp;
register int Col = ender[word]-begin[word]+1; // rows for word being tested
for( int n=0 ; n<Rows ; n++ ){
for( int m=0 ; m<Col ; m++ ){
temp = 0;
for( int ii=0 ; ii<Q ; ii++ ){
temp += lookUp(abs(libword[n][ii]-words[word][m][ii]));
}
D[n][m] = temp;
}
}
for( int n=1 ; n<Rows ; n++ ){
D[n][0] += D[n-1][0];
}
for( int m=1 ; m<Col ; m++ ){
D[0][m] += D[0][m-1];
}
for( int n=1 ; n<Rows ; n++ ){
for( int m=1 ; m<Col ; m++ ){
D[n][m] += mininum1( D[n-1][m], D[n-1][m-1], D[n][m-1] );
}
}
Dist=D[Rows-1][Col-1]; // minimum distance to end
register int n=Rows-1; // now work backwards
register int m=Col-1;
k=1;
while( (n+m) != 0 ){
if( n == 0 ){
m--;
}else if( m == 0 ){
n--;
}else{
A=D[n-1][m];
B=D[n][m-1];
C=D[n-1][m-1];
if( A < B ){
if( A < C ){
n--;
}else{
n--;
m--;
}
}else if( B < C ){
m--;
}else{
n--;
m--;
}
}
k++;
}
return Dist/k;
}
where lookup is this:
float lookUp(int16_t pow){
if( pow < MAX_DIFFERENCE ){
return powLookup[pow];
}else{
return MAX_DIFFERENCE_POW;
}
}
and powLookup is this:
const float powLookup[MAX_DIFFERENCE]={
0,
1,
4,
9,
16,
25,
36,
49,
64,
81,
100,
121,
144,
169,
196,
225,
256,
... and on
The idea is to make it more efficient in terms of micro instruction. You can see I have tried to make it better with a power lookup but it is still killer in processing. Any ideas are welcome. Maybe a faster way of indexing arrays?