Compare commits
1 Commits
Author | SHA1 | Date |
---|---|---|
cuijinrui | c7faf52cb6 |
|
@ -0,0 +1,308 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
import math
|
||||
import torch.utils.model_zoo as model_zoo
|
||||
import rep_flow_layer_lstm as rf # 采用加入了lstm注意力的表示流
|
||||
|
||||
################
|
||||
#
|
||||
# Modified https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py
|
||||
# Adds support for B x T x C x H x W video data
|
||||
#
|
||||
################
|
||||
|
||||
|
||||
__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
|
||||
'resnet152']
|
||||
|
||||
model_urls = {
|
||||
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
|
||||
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
|
||||
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
|
||||
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
|
||||
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
|
||||
}
|
||||
|
||||
|
||||
def conv3x3(in_planes, out_planes, stride=1):
|
||||
"""3x3 convolution with padding"""
|
||||
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
|
||||
padding=1, bias=False)
|
||||
|
||||
|
||||
# in_planes卷积的输入的通道,out_planes卷积的输出通道,kernel_size卷积核的尺寸,stride卷积的步长,padding填充
|
||||
|
||||
# class Attention(nn.Module):
|
||||
# # 添加的attention模块
|
||||
# def __init__(self, data):
|
||||
# super(Attention, self).__init__()
|
||||
# self.global_pooling = nn.AdaptiveAvgPool2d((1, 1)) # 全局池化,对整个featuremap池化最后每个通道只有一个数
|
||||
# self.fc1 = nn.Conv1d(in_channels=data.shape[1], out_channels=32, kernel_size=1, stride=1) # 第一层卷积
|
||||
# self.relu = nn.ReLU(inplace=True) # 激活
|
||||
# self.fc2 = nn.Conv1d(in_channels=32, out_channels=data.shape[1], kernel_size=1, stride=1) # 第二层卷积
|
||||
# self.sigmoid = nn.Sigmoid() # 激活
|
||||
#
|
||||
# def forward(self, x):
|
||||
# out = self.global_pooling(x)
|
||||
# out = self.fc1(out)
|
||||
# out = self.relu(out)
|
||||
# out = self.fc2(out)
|
||||
# out = self.sigmoid(out)
|
||||
# return out
|
||||
|
||||
|
||||
class BasicBlock(nn.Module): # 残差网络的基本模块
|
||||
expansion = 1
|
||||
|
||||
def __init__(self, inplanes, planes, stride=1, downsample=None):
|
||||
super(BasicBlock, self).__init__()
|
||||
self.conv1 = conv3x3(inplanes, planes, stride) # 定义一个卷积层
|
||||
self.bn1 = nn.BatchNorm2d(planes) # batchNorm2d操作解决梯度消失或者爆炸的问题
|
||||
self.relu = nn.ReLU(inplace=True) # 一个激活层
|
||||
self.conv2 = conv3x3(planes, planes) # 第二层卷积网络
|
||||
self.bn2 = nn.BatchNorm2d(planes) # batchNorm2d操作
|
||||
self.downsample = downsample
|
||||
self.stride = stride
|
||||
|
||||
def forward(self, x):
|
||||
# 这个方法表示程序的向前传递
|
||||
residual = x # 残差神经网络中的残差
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
|
||||
if self.downsample is not None: # 对残差进行了某一种操作
|
||||
residual = self.downsample(x)
|
||||
|
||||
out += residual
|
||||
out = self.relu(out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class Bottleneck(nn.Module):
|
||||
expansion = 4
|
||||
|
||||
def __init__(self, inplanes, planes, stride=1, downsample=None):
|
||||
super(Bottleneck, self).__init__()
|
||||
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) # 卷积
|
||||
self.bn1 = nn.BatchNorm2d(planes) # batchNorm2d操作解决梯度消失或者爆炸的问题
|
||||
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
|
||||
padding=1, bias=False) # 卷积
|
||||
self.bn2 = nn.BatchNorm2d(planes) # batchNorm2d操作解决梯度消失或者爆炸的问题
|
||||
self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False) # 卷积
|
||||
self.bn3 = nn.BatchNorm2d(planes * self.expansion) # batchNorm2d操作解决梯度消失或者爆炸的问题
|
||||
self.relu = nn.ReLU(inplace=True) # 激活层
|
||||
self.downsample = downsample # 用来对输入做了一个处理,可能是为了对其网络的输出和残差
|
||||
self.stride = stride
|
||||
|
||||
def forward(self, x): # 定义网络的向前结构
|
||||
residual = x # 以输入作为残差
|
||||
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv3(out)
|
||||
out = self.bn3(out)
|
||||
|
||||
if self.downsample is not None:
|
||||
residual = self.downsample(x)
|
||||
|
||||
out += residual # 将网络的结果与残差进行融合
|
||||
out = self.relu(out) # 使用激活函数
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class ResNet(nn.Module):
|
||||
# 建立一个残差神经网络
|
||||
def __init__(self, block, layers, inp=3, num_classes=150, input_size=112, dropout=0.5):
|
||||
self.inplanes = 64
|
||||
self.inp = inp # 输入数据的通道数
|
||||
super(ResNet, self).__init__()
|
||||
self.conv1 = nn.Conv2d(inp, 64, kernel_size=7, stride=2, padding=3,
|
||||
bias=False)
|
||||
self.bn1 = nn.BatchNorm2d(64)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) # 最大池化
|
||||
self.rep_flow = rf.FlowLayer(128) # 最后加的光流层rep_flow 128个通道
|
||||
self.layer1 = self._make_layer(block, 64, layers[0]) # 第一个残差模块
|
||||
self.layer2 = self._make_layer(block, 128, layers[1], stride=2) # 第二个残差模块
|
||||
self.layer3 = self._make_layer(block, 256, layers[2], stride=2) # 第三个残差模块
|
||||
self.layer4 = self._make_layer(block, 512, layers[3], stride=2) # 第四个残差模块
|
||||
|
||||
# probably need to adjust this based on input spatial size
|
||||
size = int(math.ceil(input_size / 32))
|
||||
self.avgpool = nn.AvgPool2d(size, stride=1)
|
||||
self.dropout = nn.Dropout(p=dropout)
|
||||
self.lstm = nn.LSTM(input_size=512, num_layers=2, hidden_size=512, batch_first=True) # create by little bear
|
||||
self.lstm_num = 2 # create by little bear
|
||||
self.fc = nn.Linear(512 * block.expansion, num_classes)
|
||||
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
|
||||
elif isinstance(m, nn.BatchNorm2d):
|
||||
nn.init.constant_(m.weight, 1)
|
||||
nn.init.constant_(m.bias, 0)
|
||||
|
||||
def _make_layer(self, block, planes, blocks, stride=1):
|
||||
downsample = None
|
||||
if stride != 1 or self.inplanes != planes * block.expansion:
|
||||
downsample = nn.Sequential(
|
||||
nn.Conv2d(self.inplanes, planes * block.expansion,
|
||||
kernel_size=1, stride=stride, bias=False),
|
||||
nn.BatchNorm2d(planes * block.expansion),
|
||||
)
|
||||
|
||||
layers = []
|
||||
layers.append(block(self.inplanes, planes, stride, downsample))
|
||||
self.inplanes = planes * block.expansion
|
||||
for i in range(1, blocks):
|
||||
layers.append(block(self.inplanes, planes))
|
||||
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
def forward(self, x):
|
||||
# x is BxTxCxHxW
|
||||
# spatio-temporal video data
|
||||
b, t, c, h, w = x.size()
|
||||
# need to view it is B*TxCxHxW for 2D CNN
|
||||
# important to keep batch and time axis next to
|
||||
# eachother, so a simple view without tranposing is possible
|
||||
x = x.view(b * t, c, h, w)
|
||||
x = self.conv1(x)
|
||||
x = self.bn1(x)
|
||||
x = self.relu(x)
|
||||
x = self.maxpool(x)
|
||||
|
||||
x = self.layer1(x)
|
||||
x = self.layer2(x)
|
||||
x = x.view(b, t, x.shape[1], x.shape[2], x.shape[3]) # 改变x形状,抽离出时间维度
|
||||
x = x.transpose(1, 2)
|
||||
x = self.rep_flow(x) # 添加了表示流层
|
||||
x = x.transpose(1, 2)
|
||||
x = x.view(b * (t - 1), x.shape[2], x.shape[3], x.shape[4]) # 将输出的时间维度和batchsize维度折叠(表示流层出来以后,时间维度已经少了一个维度了)
|
||||
x = self.layer3(x)
|
||||
x = self.layer4(x)
|
||||
|
||||
# print(x.size())
|
||||
|
||||
x = self.avgpool(x)
|
||||
x = x.view(x.size(0), -1)
|
||||
h0 = torch.randn(self.lstm_num, b, x.shape[1]) # (num_layers,batch,output_size)
|
||||
c0 = torch.randn(self.lstm_num, b, x.shape[1])
|
||||
x = x.view(b, t - 1, x.shape[1]) # 重新回到最初的形状
|
||||
x, (_, _) = self.lstm(x, (h0, c0))
|
||||
x = x.contiguous().view(b * (t - 1), x.shape[2]) # 此处出现了一个问题不能进行转换,怀疑是x的地址空间不连续
|
||||
x = self.dropout(x)
|
||||
# currently making dense, per-frame predictions
|
||||
x = self.fc(x)
|
||||
|
||||
# so view as BxTxClass
|
||||
x = x.view(b, t - 1, -1)
|
||||
# mean-pool over time
|
||||
x = torch.mean(x, dim=1)
|
||||
|
||||
# return BxClass prediction
|
||||
return x
|
||||
|
||||
def load_state_dict(self, state_dict, strict=True):
|
||||
# ignore fc layer
|
||||
state_dict = {k: v for k, v in state_dict.items() if 'fc' not in k}
|
||||
md = self.state_dict()
|
||||
md.update(state_dict)
|
||||
# convert to flow representation
|
||||
if self.inp != 3:
|
||||
for k, v in md.items():
|
||||
if k == 'conv1.weight':
|
||||
if isinstance(v, nn.Parameter):
|
||||
v = v.data
|
||||
# change image CNN to 20-channel flow by averaing RGB channels and repeating 20 times
|
||||
v = torch.mean(v, dim=1).unsqueeze(1).repeat(1, self.inp, 1, 1)
|
||||
md[k] = v
|
||||
|
||||
super(ResNet, self).load_state_dict(md, strict)
|
||||
|
||||
|
||||
def resnet18(pretrained=False, **kwargs):
|
||||
"""Constructs a ResNet-18 model.
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
"""
|
||||
model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
|
||||
if pretrained:
|
||||
model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
|
||||
return model
|
||||
|
||||
|
||||
def resnet34(pretrained=False, mode='rgb', **kwargs): # 用的是BasicBlock作为基本单元
|
||||
"""Constructs a ResNet-34 model.
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
"""
|
||||
if mode == 'flow':
|
||||
model = ResNet(BasicBlock, [3, 4, 6, 3], inp=20, **kwargs)
|
||||
else:
|
||||
model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
|
||||
|
||||
if pretrained:
|
||||
model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
|
||||
return model
|
||||
|
||||
|
||||
def resnet50(pretrained=False, mode='rgb', **kwargs): # 用的是Bottleneck作为resnet的基本单元
|
||||
"""Constructs a ResNet-50 model.
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
"""
|
||||
if mode == 'flow':
|
||||
model = ResNet(Bottleneck, [3, 4, 6, 3], inp=20, **kwargs)
|
||||
else:
|
||||
model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
|
||||
if pretrained:
|
||||
model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
|
||||
return model
|
||||
|
||||
|
||||
def resnet101(pretrained=False, **kwargs):
|
||||
"""Constructs a ResNet-101 model.
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
"""
|
||||
model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
|
||||
if pretrained:
|
||||
model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
|
||||
return model
|
||||
|
||||
|
||||
def resnet152(pretrained=False, **kwargs):
|
||||
"""Constructs a ResNet-152 model.
|
||||
Args:
|
||||
pretrained (bool): If True, returns a model pre-trained on ImageNet
|
||||
"""
|
||||
model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
|
||||
if pretrained:
|
||||
model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
|
||||
return model
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# test resnet 50
|
||||
import torch
|
||||
|
||||
d = torch.device('cpu')
|
||||
net = resnet50(pretrained=False, mode='flow')
|
||||
net.to(d)
|
||||
|
||||
vid = torch.rand((4, 32, 20, 112, 112)).to(d)
|
||||
|
||||
print(net(vid).size())
|
Loading…
Reference in New Issue