0

我正在尝试创建一个模拟蛋白质的 GNN。但是,我遇到了一个错误GraphConv(我得到了同样的错误GCNConv)。我不明白为什么当形状应该能够相乘时会出现此错误。我认为这个错误一定与我创建的自定义数据集有关,但我不能 100% 确定。如果您有类似的问题或知道如何解决此问题,请告诉我。谢谢你。

编辑:即使我更改embedding_size1479,我仍然得到:RuntimeError: mat1 and mat2 shapes cannot be multiplied (1479x1 and 1479x1479)

自定义数据集:

class ProteinDataset(geom_data.Dataset):
    def __init__(self, root, transform=None, pre_transform=None):
        # root = where data set is stored
        super(ProteinDataset, self).__init__(root, transform, pre_transform)
        self.root = root

    @property
    def raw_file_names(self):

        return os.listdir(f'{self.root}/raw')

    @property
    def processed_file_names(self):

        inxs = []

        for pdb in self.raw_paths:
            inxs.append(pdb.split('/')[-1].split('.p')[0])

        return [f'{i}.pt' for i in inxs]

    def download(self):
        pass

    def process(self):

        for pdb in self.raw_paths:

            try:
                mol_obj = Chem.rdmolfiles.MolFromPDBFile(pdb)
            except AttributeError:
                os.remove(pdb)
                continue

            # Get node features
            node_feats = self._get_node_features(mol_obj).reshape([-1,1])

            # Get edge features
            edge_feats = self._get_edge_features(mol_obj).reshape([-1,1])
            # Get adjacency info
            edge_index = self._get_adjacency_info(mol_obj)

            label = self._get_labels(pdb)

            # Create Data object
            data = geom_data.Data(x=node_feats,
                                  edge_index=edge_index,
                                  edge_attr=edge_feats,
                                  y=label)

            i = pdb.split('/')[-1].split('.p')[0]

            torch.save(data, os.path.join(self.processed_dir,f'{i}.pt'))

    def _get_node_features(self, mol):

        all_node_feats = []


        for atom in mol.GetAtoms():
            all_node_feats.append(atom.GetMass())

        all_node_feats = np.asarray(all_node_feats)
        return torch.tensor(all_node_feats, dtype=torch.float)

    def _get_edge_features(self, mol):

        all_edge_feats = []

        dists = Chem.rdmolops.Get3DDistanceMatrix(mol)

        # CA-CA Distances
        for bond in mol.GetBonds():

            begin = bond.GetBeginAtomIdx()
            end = bond.GetEndAtomIdx()

            all_edge_feats.append(dists[begin,end])

        all_edge_feats = np.asarray(all_edge_feats)
        return torch.tensor(all_edge_feats, dtype=torch.float)

    def _get_adjacency_info(self, mol):

        adj_matrix = Chem.rdmolops.GetAdjacencyMatrix(mol)
        row, col = np.where(adj_matrix)
        coo = np.array(list(zip(row, col)))
        coo = np.reshape(coo, (2, -1))
        return torch.tensor(coo, dtype=torch.long)

    def _get_labels(self, fn):

        with open(fn, 'r') as f:
            label = float(f.readline())
            f.close()

        label = np.asarray([label])
        return torch.tensor(label, dtype=torch.float)

    def len(self):
        return len(self.raw_paths)

    def get(self, inx):

        data = torch.load(self.processed_paths[inx])

        return data

模型:

class GNN(torch.nn.Module):
    def __init__(self, feature_size):
        super(GNN, self).__init__()

        embedding_size = 1024

        # GNN Layers

        self.conv1 = GraphConv(feature_size, embedding_size)
        self.head1 = Linear(embedding_size*3, embedding_size)
        self.pool1 = TopKPooling(embedding_size, ratio=0.8)

        self.conv2 = GraphConv(embedding_size, embedding_size)
        self.head2 = Linear(embedding_size*3, embedding_size)
        self.pool2 = TopKPooling(embedding_size, ratio=0.5)

        self.conv3 = GraphConv(embedding_size, embedding_size)
        self.head3 = Linear(embedding_size*3, embedding_size)
        self.pool3 = TopKPooling(embedding_size, ratio=0.2)

        # Linear Layers

        self.fc1 = Linear(embedding_size*2, 1024)
        self.fc2 = Linear(1024, 128)
        self.fc3 = Linear(128, 1)

    def forward(self, x, edge_attr, edge_index, batch_index):

        # First block
        x = self.conv1(x, edge_index).relu()
        x = self.head1(x)

        x, edge_index, edge_attr, batch_index, _, _ = self.pool1(x,
                                                                 edge_index,
                                                                 None,
                                                                 batch_index)

        x1 = torch.cat([gmp(x, batch_index), gap(x, batch_index)], dim=1)

        # Second block
        x = self.conv2(x, edge_index).relu()
        x = self.head2(x)

        x, edge_index, edge_attr, batch_index, _, _ = self.pool2(x,
                                                                 edge_index,
                                                                 None,
                                                                 batch_index)

        x2 = torch.cat([gmp(x, batch_index), gap(x, batch_index)], dim=1)

        # Third block
        x = self.conv3(x, edge_index).relu()
        x = self.head3(x)

        x, edge_index, edge_attr, batch_index, _, _ = self.pool3(x,
                                                                 edge_index,
                                                                 None,
                                                                 batch_index)

        x3 = torch.cat([gmp(x, batch_index), gap(x, batch_index)], dim=1)

        # Concat pooled vectors
        x = x1 + x2 + x3

        # Apply Linear Layers
        x = self.fc1(x).relu()
        x = self.fc2(x).relu()
        x = self.fc3(x)

        return x

训练:

device = torch.device('cuda')

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

# Loading the dataset
train_set = ProteinDataset(root='data/lys50_2/train')
test_set = ProteinDataset(root='data/lys50_2/test')
print('Shape of input:', train_set[0].x.shape[0])

# Loading the model
model = GNN(feature_size=train_set[0].x.shape[0])
model = model.to(device)
print(f'Number of parameters: {count_parameters(model)}')
print(model)

# Loss and Optimizer
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.00005)
print(optimizer)

# Prepare for training
train_loader = DataLoader(train_set, batch_size=1, shuffle=True)
test_loader = DataLoader(test_set, batch_size=1, shuffle=False)

def train(m,opt):
    loss_sum = 0.0
    for _, batch in enumerate(train_loader):
        # Use GPU
        batch.to(device)
        # Reset grad
        opt.zero_grad()
        # Pass node features and connections
        pred = m(batch.x.float(),
                 batch.edge_attr.float(),
                 batch.edge_index,
                 batch.batch)
        # Calculate loss and gradients
        loss = loss_fn(pred, batch.y)
        loss.backward()
        loss_sum += loss.item()
        # Update using the gradients
        opt.step()
    return loss_sum / len(train_loader)

def validate(m):
    loss_sum = 0.0
    for _, batch in enumerate(test_loader):
    for _, batch in enumerate(test_loader):
        # Use GPU
        batch.to(device)
        # No grad
        with torch.no_grad():
            pred = m(batch.x.float(),
                     batch.edge_attr.float(),
                     batch.edge_index,
                     batch.batch)
        # Calculate loss and gradients
        loss = loss_fn(pred, batch.y)
        loss_sum += loss.item()

    return loss_sum / len(test_loader)

model.zero_grad()
optimizer.zero_grad()

# Loop for training
for i in range(101):
    loss = train(model,optimizer)
    if (i%10==0):
        loss_v = validate(model)
        print(i, loss, loss_v)
    else:
        print(i, loss)

运行训练时出错:

Traceback (most recent call last):
  File "/home/spencer/sh3/gnn/./train.py", line 79, in <module>
    loss = train(model,optimizer)
  File "/home/spencer/sh3/gnn/./train.py", line 44, in train
    pred = m(batch.x.float(),
  File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
    return forward_call(*input, **kwargs)
  File "/feig/s1/spencer/sh3/gnn/model2.py", line 32, in forward
    x = self.conv1(x, edge_index).relu()
  File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
    return forward_call(*input, **kwargs)
  File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch_geometric/nn/conv/graph_conv.py", line 71, in forward
    out = self.lin_rel(out)
  File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
    return forward_call(*input, **kwargs)
  File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch_geometric/nn/dense/linear.py", line 109, in forward
    return F.linear(x, self.weight, self.bias)
  File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch/nn/functional.py", line 1848, in linear
    return torch._C._nn.linear(input, weight, bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (1479x1 and 1479x1024)
4

1 回答 1

0

该错误告诉您输入形状不匹配。

您可以像这样在 forward 方法中重塑输入:x = x.view(1, 1479)但请确保这是您需要的 - 此错误通常表示数据集形状错误或传递了错误的输入

于 2022-01-25T06:59:01.140 回答