From c7faf52cb62fa02d06debbe040c803db41279101 Mon Sep 17 00:00:00 2001 From: cuijinrui <2842410839@qq.com> Date: Thu, 8 Oct 2020 23:17:57 +0800 Subject: [PATCH] resnet50 --- baseline_2d_resnets.py | 308 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 308 insertions(+) create mode 100644 baseline_2d_resnets.py diff --git a/baseline_2d_resnets.py b/baseline_2d_resnets.py new file mode 100644 index 0000000000..e891ca9bba --- /dev/null +++ b/baseline_2d_resnets.py @@ -0,0 +1,308 @@ +import torch +import torch.nn as nn +import math +import torch.utils.model_zoo as model_zoo +import rep_flow_layer_lstm as rf # 采用加入了lstm注意力的表示流 + +################ +# +# Modified https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py +# Adds support for B x T x C x H x W video data +# +################ + + +__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', + 'resnet152'] + +model_urls = { + 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', + 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', + 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', + 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', + 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', +} + + +def conv3x3(in_planes, out_planes, stride=1): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=1, bias=False) + + +# in_planes卷积的输入的通道,out_planes卷积的输出通道,kernel_size卷积核的尺寸,stride卷积的步长,padding填充 + +# class Attention(nn.Module): +# # 添加的attention模块 +# def __init__(self, data): +# super(Attention, self).__init__() +# self.global_pooling = nn.AdaptiveAvgPool2d((1, 1)) # 全局池化,对整个featuremap池化最后每个通道只有一个数 +# self.fc1 = nn.Conv1d(in_channels=data.shape[1], out_channels=32, kernel_size=1, stride=1) # 第一层卷积 +# self.relu = nn.ReLU(inplace=True) # 激活 +# self.fc2 = nn.Conv1d(in_channels=32, out_channels=data.shape[1], kernel_size=1, stride=1) # 第二层卷积 +# self.sigmoid = nn.Sigmoid() # 激活 +# +# def forward(self, x): +# out = self.global_pooling(x) +# out = self.fc1(out) +# out = self.relu(out) +# out = self.fc2(out) +# out = self.sigmoid(out) +# return out + + +class BasicBlock(nn.Module): # 残差网络的基本模块 + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(inplanes, planes, stride) # 定义一个卷积层 + self.bn1 = nn.BatchNorm2d(planes) # batchNorm2d操作解决梯度消失或者爆炸的问题 + self.relu = nn.ReLU(inplace=True) # 一个激活层 + self.conv2 = conv3x3(planes, planes) # 第二层卷积网络 + self.bn2 = nn.BatchNorm2d(planes) # batchNorm2d操作 + self.downsample = downsample + self.stride = stride + + def forward(self, x): + # 这个方法表示程序的向前传递 + residual = x # 残差神经网络中的残差 + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: # 对残差进行了某一种操作 + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(Bottleneck, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) # 卷积 + self.bn1 = nn.BatchNorm2d(planes) # batchNorm2d操作解决梯度消失或者爆炸的问题 + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, + padding=1, bias=False) # 卷积 + self.bn2 = nn.BatchNorm2d(planes) # batchNorm2d操作解决梯度消失或者爆炸的问题 + self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False) # 卷积 + self.bn3 = nn.BatchNorm2d(planes * self.expansion) # batchNorm2d操作解决梯度消失或者爆炸的问题 + self.relu = nn.ReLU(inplace=True) # 激活层 + self.downsample = downsample # 用来对输入做了一个处理,可能是为了对其网络的输出和残差 + self.stride = stride + + def forward(self, x): # 定义网络的向前结构 + residual = x # 以输入作为残差 + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual # 将网络的结果与残差进行融合 + out = self.relu(out) # 使用激活函数 + + return out + + +class ResNet(nn.Module): + # 建立一个残差神经网络 + def __init__(self, block, layers, inp=3, num_classes=150, input_size=112, dropout=0.5): + self.inplanes = 64 + self.inp = inp # 输入数据的通道数 + super(ResNet, self).__init__() + self.conv1 = nn.Conv2d(inp, 64, kernel_size=7, stride=2, padding=3, + bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) # 最大池化 + self.rep_flow = rf.FlowLayer(128) # 最后加的光流层rep_flow 128个通道 + self.layer1 = self._make_layer(block, 64, layers[0]) # 第一个残差模块 + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) # 第二个残差模块 + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) # 第三个残差模块 + self.layer4 = self._make_layer(block, 512, layers[3], stride=2) # 第四个残差模块 + + # probably need to adjust this based on input spatial size + size = int(math.ceil(input_size / 32)) + self.avgpool = nn.AvgPool2d(size, stride=1) + self.dropout = nn.Dropout(p=dropout) + self.lstm = nn.LSTM(input_size=512, num_layers=2, hidden_size=512, batch_first=True) # create by little bear + self.lstm_num = 2 # create by little bear + self.fc = nn.Linear(512 * block.expansion, num_classes) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + def _make_layer(self, block, planes, blocks, stride=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d(self.inplanes, planes * block.expansion, + kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(planes * block.expansion), + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample)) + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append(block(self.inplanes, planes)) + + return nn.Sequential(*layers) + + def forward(self, x): + # x is BxTxCxHxW + # spatio-temporal video data + b, t, c, h, w = x.size() + # need to view it is B*TxCxHxW for 2D CNN + # important to keep batch and time axis next to + # eachother, so a simple view without tranposing is possible + x = x.view(b * t, c, h, w) + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = x.view(b, t, x.shape[1], x.shape[2], x.shape[3]) # 改变x形状,抽离出时间维度 + x = x.transpose(1, 2) + x = self.rep_flow(x) # 添加了表示流层 + x = x.transpose(1, 2) + x = x.view(b * (t - 1), x.shape[2], x.shape[3], x.shape[4]) # 将输出的时间维度和batchsize维度折叠(表示流层出来以后,时间维度已经少了一个维度了) + x = self.layer3(x) + x = self.layer4(x) + + # print(x.size()) + + x = self.avgpool(x) + x = x.view(x.size(0), -1) + h0 = torch.randn(self.lstm_num, b, x.shape[1]) # (num_layers,batch,output_size) + c0 = torch.randn(self.lstm_num, b, x.shape[1]) + x = x.view(b, t - 1, x.shape[1]) # 重新回到最初的形状 + x, (_, _) = self.lstm(x, (h0, c0)) + x = x.contiguous().view(b * (t - 1), x.shape[2]) # 此处出现了一个问题不能进行转换,怀疑是x的地址空间不连续 + x = self.dropout(x) + # currently making dense, per-frame predictions + x = self.fc(x) + + # so view as BxTxClass + x = x.view(b, t - 1, -1) + # mean-pool over time + x = torch.mean(x, dim=1) + + # return BxClass prediction + return x + + def load_state_dict(self, state_dict, strict=True): + # ignore fc layer + state_dict = {k: v for k, v in state_dict.items() if 'fc' not in k} + md = self.state_dict() + md.update(state_dict) + # convert to flow representation + if self.inp != 3: + for k, v in md.items(): + if k == 'conv1.weight': + if isinstance(v, nn.Parameter): + v = v.data + # change image CNN to 20-channel flow by averaing RGB channels and repeating 20 times + v = torch.mean(v, dim=1).unsqueeze(1).repeat(1, self.inp, 1, 1) + md[k] = v + + super(ResNet, self).load_state_dict(md, strict) + + +def resnet18(pretrained=False, **kwargs): + """Constructs a ResNet-18 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet18'])) + return model + + +def resnet34(pretrained=False, mode='rgb', **kwargs): # 用的是BasicBlock作为基本单元 + """Constructs a ResNet-34 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + if mode == 'flow': + model = ResNet(BasicBlock, [3, 4, 6, 3], inp=20, **kwargs) + else: + model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) + + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet34'])) + return model + + +def resnet50(pretrained=False, mode='rgb', **kwargs): # 用的是Bottleneck作为resnet的基本单元 + """Constructs a ResNet-50 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + if mode == 'flow': + model = ResNet(Bottleneck, [3, 4, 6, 3], inp=20, **kwargs) + else: + model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet50'])) + return model + + +def resnet101(pretrained=False, **kwargs): + """Constructs a ResNet-101 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet101'])) + return model + + +def resnet152(pretrained=False, **kwargs): + """Constructs a ResNet-152 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet152'])) + return model + + +if __name__ == '__main__': + # test resnet 50 + import torch + + d = torch.device('cpu') + net = resnet50(pretrained=False, mode='flow') + net.to(d) + + vid = torch.rand((4, 32, 20, 112, 112)).to(d) + + print(net(vid).size())