我正在尝试创建一个序列比对程序,并且正在重新利用我在网上找到的一些代码,但我正在努力找出为什么我的图表有时输出正确,特别是在接近尾声时,因为我似乎总是得到正确的比对分数,但我的图表是如上图所示,总是在这里和那里偏离几个点。(右边是期望的输出,左边是不正确的输出)我怀疑错误存在于替换评分矩阵的某个地方。我意识到有些功能是不必要的,但是因为害怕弄乱我已经拥有的功能而犹豫要更换它们。
#include <iostream>
#include <fstream>
#include <string>
#include <cmath>
#include <algorithm>
#include <time.h>
using namespace std;
// Affine gap function
int gap_affinity(int gap) {
int gap_aff = gap;
return gap_aff;
}
// Get maximal score and trace it
int max_score(int up, int diag, int left, char* ptr) {
int max = 0;
if (diag >= up && diag >= left) {
max = diag;
*ptr = '\\';
}
else if (up > left) {
max = up;
*ptr = '|';
}
else {
max = left;
*ptr = '-';
}
return max;
}
// Initialise scoring matrix with first row and column
void init(int** M, char** M_tb, int A_n, int B_n, int gap) {
M[0][0] = 0;
M_tb[0][0] = 'n';
int gapvert = gap;
int gaphor = gap;
int i = 0, j = 0;
for (j = 1; j <= A_n; j++) {
M[0][j] = -(gaphor);
M_tb[0][j] = '-';
gaphor += 2;
}
for (i = 1; i <= B_n; i++) {
M[i][0] = -(gapvert);
M_tb[i][0] = '|';
gapvert += 2;
}
}
// Needleman and Wunsch algorithm
int alignment(int** M, char** M_tb, string A, string B, string& A_al, string& B_al, int A_n, int B_n, int match, int misMatch, int gap) {
int x = 0, y = 0;
int scU, scD, scL; // scores for respectively cell above, diagonal and left
char ptr, nuc;
int i = 0, j = 0;
// create substitution scoring matrix
const int s[3][3] = { { match, misMatch, misMatch },
{ misMatch, match, misMatch },
{ misMatch, misMatch, match } };
for (i = 1; i <= B_n; i++) {
for (j = 1; j <= A_n; j++) {
nuc = A[j - 1];
switch (nuc)
{
case 'C': x = 0; break;
case 'T': x = 1; break;
case 'A': x = 2; break;
case 'G': x = 3;
}
nuc = B[i - 1];
switch (nuc)
{
case 'C': y = 0; break;
case 'T': y = 1; break;
case 'A': y = 2; break;
case 'G': y = 3;
}
scU = M[i - 1][j] - gap_affinity(gap); // get score if trace would go up
scD = M[i - 1][j - 1] + s[x][y]; // get score if trace would go diagonal
scL = M[i][j - 1] - gap_affinity(gap); // get score if trace would go left
M[i][j] = max_score(scU, scD, scL, &ptr); // get max score for current optimal global alignment
M_tb[i][j] = ptr;
}
}
i--; j--;
while (i > 0 || j > 0) {
switch (M_tb[i][j])
{
case '|': A_al += '-';
B_al += B[i - 1];
i--;
break;
case '\\': A_al += A[j - 1];
B_al += B[i - 1];
i--; j--;
break;
case '-': A_al += A[j - 1];
B_al += '-';
j--;
}
}
reverse(A_al.begin(), A_al.end());
reverse(B_al.begin(), B_al.end());
return 0;
}
// Print the scoring matrix
void print_mtx(int** M, string A, string B, int A_n, int B_n) {
cout << " ";
for (int j = 0; j < A_n; j++) {
cout << A[j] << " ";
}
cout << "\n ";
for (int i = 0; i <= B_n; i++) {
if (i > 0) {
cout << B[i - 1] << " ";
}
for (int j = 0; j <= A_n; j++) {
cout.width(3);
cout << M[i][j] << " ";
}
cout << endl;
}
cout << endl;
}
// Print the traceback matrix
void print_tb(char** M_tb, string A, string B, int A_n, int B_n) {
cout << " ";
for (int j = 0; j < A_n; j++) {
cout << A[j] << " ";
}
cout << "\n ";
for (int i = 0; i <= B_n; i++) {
if (i > 0) {
cout << B[i - 1] << " ";
}
for (int j = 0; j <= A_n; j++) {
cout.width(3);
cout << M_tb[i][j] << " ";
}
cout << endl;
}
cout << endl;
}
// Initiate matrices, align and export
int** NW(string A, string B, string& A_al, string& B_al, int A_n, int B_n, int match, int misMatch, int gap) {
// Create alignment matrix
int** M = new int* [B_n + 1];
for (int i = 0; i <= B_n; i++) {
M[i] = new int[A_n + 1];
}
// Create traceback matrix
char** M_tb = new char* [B_n + 1];
for (int i = 0; i <= B_n; i++) {
M_tb[i] = new char[A_n + 1];
}
clock_t t; // for timing execution
t = clock(); // get time of start
// Initialize traceback and F matrix (fill in first row and column)
init(M, M_tb, A_n, B_n, gap);
// Create alignment
alignment(M, M_tb, A, B, A_al, B_al, A_n, B_n, match, misMatch, gap);
int score = M[B_n][A_n]; // get alignment score
if (true) {
print_mtx(M, A, B, A_n, B_n);
print_tb(M_tb, A, B, A_n, B_n);
}
if (true) {
cout << endl << "Alignments: " << endl;
int start = 0; // start of new line for printing alignments
int cntr = 0; // iterator for printing alignments
int Al_n = A_al.length(); // length of alignment
do {
cout << start + 1 << " A: ";
for (cntr = start; cntr < start + 150; cntr++) {
if (cntr < Al_n) {
cout << A_al[cntr];
}
else {
break;
}
}
cout << " " << endl << start + 1 << " B: ";
for (cntr = start; cntr < start + 150; cntr++) {
if (cntr < Al_n) {
cout << B_al[cntr];
}
else {
break;
}
}
cout << " " << endl << endl;
start += 150;
} while (start <= Al_n);
}
// Show score and runtime
cout << "Alignment score: " << score << endl;
// Free memory
for (int i = 0; i <= B_n; i++) delete M[i];
delete[] M;
for (int i = 0; i <= B_n; i++) delete M_tb[i];
delete[] M_tb;
return 0;
}
int main(int argc, char** argv) {
string A, B; // sequence to be aligned A and B
string A_al, B_al = ""; // aligned sequence A and B
int match = 5; // match
int misMatch = -3; // mismatch
int gap = 2; // initial gap penalty. Gap penalty is lower than mismatch: two sequences from same species assumed.
string line; // reading in data
// User interface
cout << "==============================" << endl;
cout << "Enter the first string ";
getline(cin, line);
A = line;
cout << "Enter the second string ";
getline(cin, line);
B = line;
cout << "Enter score for matching. ";
cin >> match;
cout << "Enter score for mismatch. ";
cin >> misMatch;
cout << "Enter gap pentalty. ";
cin >> gap;
// Read in the alignments
// Run the alignment script
int A_n = A.length();
int B_n = B.length();
NW(A, B, A_al, B_al, A_n, B_n, match, misMatch, gap);
// Output parameters
cout << endl << "Used parameters:" << endl;
cout << "Match = " << match << endl;
cout << "Mismatch = " << misMatch << endl;
cout << "Gap = -" << gap << endl;
return 0;
}