我正在使用英特尔 OpenFL 运行联邦学习实验。我在 Keras 中有 MNIST 的分片描述符,但我想使用 PyTorch。分片描述符的代码是这样的:
@property
def sample_shape(self):
"""Return the sample shape info."""
return ['784']
@property
def target_shape(self):
"""Return the target shape info."""
return ['1']
@property
def dataset_description(self) -> str:
"""Return the dataset description."""
return (f'Mnist dataset, shard number {self.rank}'
f' out of {self.worldsize}')
def download_data(self):
"""Download prepared dataset."""
local_file_path = 'mnist.npz'
mnist_url = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz'
response = requests.get(mnist_url)
with open(local_file_path, 'wb') as f:
f.write(response.content)
with np.load(local_file_path) as f:
x_train, y_train = f['x_train'], f['y_train']
x_test, y_test = f['x_test'], f['y_test']
x_train = np.reshape(x_train, (-1, 784))
x_test = np.reshape(x_test, (-1, 784))
如果我在 PyTorch 实验中使用相同的代码,则会出现此错误ValueError: pic should be 2/3 dimensional. Got 1 dimensions.
我试图改变这一点:
@property
def sample_shape(self):
"""Return the sample shape info."""
return ['28', '28', '1']
@property
def target_shape(self):
"""Return the target shape info."""
return return ['28', '28', '1']
@property
def dataset_description(self) -> str:
"""Return the dataset description."""
return (f'Mnist dataset, shard number {self.rank}'
f' out of {self.worldsize}')
def download_data(self):
"""Download prepared dataset."""
local_file_path = 'mnist.npz'
mnist_url = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz'
response = requests.get(mnist_url)
with open(local_file_path, 'wb') as f:
f.write(response.content)
with np.load(local_file_path) as f:
x_train, y_train = f['x_train'], f['y_train']
x_test, y_test = f['x_test'], f['y_test']
有了这个解决方案,它似乎工作了,培训就开始了。但是,我获得了奇怪值(超出范围 0-100%)的准确度,所以我试图保持 keras 代码的相同结构。
我的网络是这样的:
class VGG16(nn.Module):
def __init__(self, num_classes):
super(VGG16, self).__init__()
# calculate same padding:
# (w - k + 2*p)/s + 1 = o
# => p = (s(o-1) - w + k)/2
self.block_1 = nn.Sequential(
nn.Conv2d(in_channels=1,
out_channels=64,
kernel_size=(3, 3),
stride=(1, 1),
# (1(32-1)- 32 + 3)/2 = 1
padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.Conv2d(in_channels=64,
out_channels=64,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
)
self.block_2 = nn.Sequential(
nn.Conv2d(in_channels=64,
out_channels=128,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.Conv2d(in_channels=128,
out_channels=128,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
)
self.block_3 = nn.Sequential(
nn.Conv2d(in_channels=128,
out_channels=256,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.Conv2d(in_channels=256,
out_channels=256,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.Conv2d(in_channels=256,
out_channels=256,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
)
self.block_4 = nn.Sequential(
nn.Conv2d(in_channels=256,
out_channels=512,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.Conv2d(in_channels=512,
out_channels=512,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.Conv2d(in_channels=512,
out_channels=512,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
)
self.classifier = nn.Sequential(
nn.Linear(out_features, 4096),
nn.ReLU(True),
nn.Dropout(p=0.65),
nn.Linear(4096, 4096),
nn.ReLU(True),
nn.Dropout(p=0.65),
nn.Linear(4096, num_classes)
)
for m in self.modules():
if isinstance(m, torch.nn.Conv2d) or isinstance(m, torch.nn.Linear):
nn.init.kaiming_uniform_(m.weight, mode='fan_in', nonlinearity='leaky_relu')
# nn.init.xavier_normal_(m.weight)
if m.bias is not None:
m.bias.detach().zero_()
# self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
def forward(self, x):
x = self.block_1(x)
x = self.block_2(x)
x = self.block_3(x)
x = self.block_4(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return F.log_softmax(x)
这是我的转变:
normalize = T.Normalize(
mean=[0.1307],
std=[0.3081]
)
augmentation = T.RandomApply(
[T.RandomHorizontalFlip(),
T.RandomRotation(10)],
p=.8
)
training_transform = T.Compose(
[T.ToTensor(),
T.Resize(28),
augmentation,
normalize]
)
valid_transform = T.Compose(
[T.ToTensor(),
T.Resize(28),
normalize]
)