This commit is contained in:
cuijinrui 2020-10-08 23:17:57 +08:00
parent f34a65a2b0
commit c7faf52cb6
1 changed files with 308 additions and 0 deletions

308
baseline_2d_resnets.py Normal file
View File

@ -0,0 +1,308 @@
import torch
import torch.nn as nn
import math
import torch.utils.model_zoo as model_zoo
import rep_flow_layer_lstm as rf # 采用加入了lstm注意力的表示流
################
#
# Modified https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py
# Adds support for B x T x C x H x W video data
#
################
__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
'resnet152']
model_urls = {
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}
def conv3x3(in_planes, out_planes, stride=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
# in_planes卷积的输入的通道out_planes卷积的输出通道kernel_size卷积核的尺寸stride卷积的步长padding填充
# class Attention(nn.Module):
# # 添加的attention模块
# def __init__(self, data):
# super(Attention, self).__init__()
# self.global_pooling = nn.AdaptiveAvgPool2d((1, 1)) # 全局池化,对整个featuremap池化最后每个通道只有一个数
# self.fc1 = nn.Conv1d(in_channels=data.shape[1], out_channels=32, kernel_size=1, stride=1) # 第一层卷积
# self.relu = nn.ReLU(inplace=True) # 激活
# self.fc2 = nn.Conv1d(in_channels=32, out_channels=data.shape[1], kernel_size=1, stride=1) # 第二层卷积
# self.sigmoid = nn.Sigmoid() # 激活
#
# def forward(self, x):
# out = self.global_pooling(x)
# out = self.fc1(out)
# out = self.relu(out)
# out = self.fc2(out)
# out = self.sigmoid(out)
# return out
class BasicBlock(nn.Module): # 残差网络的基本模块
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride) # 定义一个卷积层
self.bn1 = nn.BatchNorm2d(planes) # batchNorm2d操作解决梯度消失或者爆炸的问题
self.relu = nn.ReLU(inplace=True) # 一个激活层
self.conv2 = conv3x3(planes, planes) # 第二层卷积网络
self.bn2 = nn.BatchNorm2d(planes) # batchNorm2d操作
self.downsample = downsample
self.stride = stride
def forward(self, x):
# 这个方法表示程序的向前传递
residual = x # 残差神经网络中的残差
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None: # 对残差进行了某一种操作
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) # 卷积
self.bn1 = nn.BatchNorm2d(planes) # batchNorm2d操作解决梯度消失或者爆炸的问题
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, bias=False) # 卷积
self.bn2 = nn.BatchNorm2d(planes) # batchNorm2d操作解决梯度消失或者爆炸的问题
self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False) # 卷积
self.bn3 = nn.BatchNorm2d(planes * self.expansion) # batchNorm2d操作解决梯度消失或者爆炸的问题
self.relu = nn.ReLU(inplace=True) # 激活层
self.downsample = downsample # 用来对输入做了一个处理,可能是为了对其网络的输出和残差
self.stride = stride
def forward(self, x): # 定义网络的向前结构
residual = x # 以输入作为残差
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual # 将网络的结果与残差进行融合
out = self.relu(out) # 使用激活函数
return out
class ResNet(nn.Module):
# 建立一个残差神经网络
def __init__(self, block, layers, inp=3, num_classes=150, input_size=112, dropout=0.5):
self.inplanes = 64
self.inp = inp # 输入数据的通道数
super(ResNet, self).__init__()
self.conv1 = nn.Conv2d(inp, 64, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) # 最大池化
self.rep_flow = rf.FlowLayer(128) # 最后加的光流层rep_flow 128个通道
self.layer1 = self._make_layer(block, 64, layers[0]) # 第一个残差模块
self.layer2 = self._make_layer(block, 128, layers[1], stride=2) # 第二个残差模块
self.layer3 = self._make_layer(block, 256, layers[2], stride=2) # 第三个残差模块
self.layer4 = self._make_layer(block, 512, layers[3], stride=2) # 第四个残差模块
# probably need to adjust this based on input spatial size
size = int(math.ceil(input_size / 32))
self.avgpool = nn.AvgPool2d(size, stride=1)
self.dropout = nn.Dropout(p=dropout)
self.lstm = nn.LSTM(input_size=512, num_layers=2, hidden_size=512, batch_first=True) # create by little bear
self.lstm_num = 2 # create by little bear
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def forward(self, x):
# x is BxTxCxHxW
# spatio-temporal video data
b, t, c, h, w = x.size()
# need to view it is B*TxCxHxW for 2D CNN
# important to keep batch and time axis next to
# eachother, so a simple view without tranposing is possible
x = x.view(b * t, c, h, w)
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = x.view(b, t, x.shape[1], x.shape[2], x.shape[3]) # 改变x形状抽离出时间维度
x = x.transpose(1, 2)
x = self.rep_flow(x) # 添加了表示流层
x = x.transpose(1, 2)
x = x.view(b * (t - 1), x.shape[2], x.shape[3], x.shape[4]) # 将输出的时间维度和batchsize维度折叠(表示流层出来以后,时间维度已经少了一个维度了)
x = self.layer3(x)
x = self.layer4(x)
# print(x.size())
x = self.avgpool(x)
x = x.view(x.size(0), -1)
h0 = torch.randn(self.lstm_num, b, x.shape[1]) # (num_layers,batch,output_size)
c0 = torch.randn(self.lstm_num, b, x.shape[1])
x = x.view(b, t - 1, x.shape[1]) # 重新回到最初的形状
x, (_, _) = self.lstm(x, (h0, c0))
x = x.contiguous().view(b * (t - 1), x.shape[2]) # 此处出现了一个问题不能进行转换怀疑是x的地址空间不连续
x = self.dropout(x)
# currently making dense, per-frame predictions
x = self.fc(x)
# so view as BxTxClass
x = x.view(b, t - 1, -1)
# mean-pool over time
x = torch.mean(x, dim=1)
# return BxClass prediction
return x
def load_state_dict(self, state_dict, strict=True):
# ignore fc layer
state_dict = {k: v for k, v in state_dict.items() if 'fc' not in k}
md = self.state_dict()
md.update(state_dict)
# convert to flow representation
if self.inp != 3:
for k, v in md.items():
if k == 'conv1.weight':
if isinstance(v, nn.Parameter):
v = v.data
# change image CNN to 20-channel flow by averaing RGB channels and repeating 20 times
v = torch.mean(v, dim=1).unsqueeze(1).repeat(1, self.inp, 1, 1)
md[k] = v
super(ResNet, self).load_state_dict(md, strict)
def resnet18(pretrained=False, **kwargs):
"""Constructs a ResNet-18 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
return model
def resnet34(pretrained=False, mode='rgb', **kwargs): # 用的是BasicBlock作为基本单元
"""Constructs a ResNet-34 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
if mode == 'flow':
model = ResNet(BasicBlock, [3, 4, 6, 3], inp=20, **kwargs)
else:
model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
return model
def resnet50(pretrained=False, mode='rgb', **kwargs): # 用的是Bottleneck作为resnet的基本单元
"""Constructs a ResNet-50 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
if mode == 'flow':
model = ResNet(Bottleneck, [3, 4, 6, 3], inp=20, **kwargs)
else:
model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
return model
def resnet101(pretrained=False, **kwargs):
"""Constructs a ResNet-101 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
return model
def resnet152(pretrained=False, **kwargs):
"""Constructs a ResNet-152 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
if pretrained:
model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
return model
if __name__ == '__main__':
# test resnet 50
import torch
d = torch.device('cpu')
net = resnet50(pretrained=False, mode='flow')
net.to(d)
vid = torch.rand((4, 32, 20, 112, 112)).to(d)
print(net(vid).size())