resnet50

2020-10-08 23:17:57 +08:00
1 changed files with 308 additions and 0 deletions
--- a/baseline_2d_resnets.py
+++ b/baseline_2d_resnets.py
@ -0,0 +1,308 @@
+import torch
+import torch.nn as nn
+import math
+import torch.utils.model_zoo as model_zoo
+import rep_flow_layer_lstm as rf # 采用加入了lstm注意力的表示流
+
+################
+#
+# Modified https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py
+# Adds support for B x T x C x H x W video data
+#
+################
+
+
+__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
+           'resnet152']
+
+model_urls = {
+    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
+    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
+    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
+    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
+    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
+}
+
+
+def conv3x3(in_planes, out_planes, stride=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=1, bias=False)
+
+
+# in_planes卷积的输入的通道，out_planes卷积的输出通道，kernel_size卷积核的尺寸，stride卷积的步长，padding填充
+
+# class Attention(nn.Module):
+#     # 添加的attention模块
+#     def __init__(self, data):
+#         super(Attention, self).__init__()
+#         self.global_pooling = nn.AdaptiveAvgPool2d((1, 1))  # 全局池化,对整个featuremap池化最后每个通道只有一个数
+#         self.fc1 = nn.Conv1d(in_channels=data.shape[1], out_channels=32, kernel_size=1, stride=1)  # 第一层卷积
+#         self.relu = nn.ReLU(inplace=True)  # 激活
+#         self.fc2 = nn.Conv1d(in_channels=32, out_channels=data.shape[1], kernel_size=1, stride=1)  # 第二层卷积
+#         self.sigmoid = nn.Sigmoid()  # 激活
+#
+#     def forward(self, x):
+#         out = self.global_pooling(x)
+#         out = self.fc1(out)
+#         out = self.relu(out)
+#         out = self.fc2(out)
+#         out = self.sigmoid(out)
+#         return out
+
+
+class BasicBlock(nn.Module):  # 残差网络的基本模块
+    expansion = 1
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(inplanes, planes, stride)  # 定义一个卷积层
+        self.bn1 = nn.BatchNorm2d(planes)  # batchNorm2d操作解决梯度消失或者爆炸的问题
+        self.relu = nn.ReLU(inplace=True)  # 一个激活层
+        self.conv2 = conv3x3(planes, planes)  # 第二层卷积网络
+        self.bn2 = nn.BatchNorm2d(planes)  # batchNorm2d操作
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        # 这个方法表示程序的向前传递
+        residual = x  # 残差神经网络中的残差
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+
+        if self.downsample is not None:  # 对残差进行了某一种操作
+            residual = self.downsample(x)
+
+        out += residual
+        out = self.relu(out)
+
+        return out
+
+
+class Bottleneck(nn.Module):
+    expansion = 4
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)  # 卷积
+        self.bn1 = nn.BatchNorm2d(planes)  # batchNorm2d操作解决梯度消失或者爆炸的问题
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
+                               padding=1, bias=False)  # 卷积
+        self.bn2 = nn.BatchNorm2d(planes)  # batchNorm2d操作解决梯度消失或者爆炸的问题
+        self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False)  # 卷积
+        self.bn3 = nn.BatchNorm2d(planes * self.expansion)  # batchNorm2d操作解决梯度消失或者爆炸的问题
+        self.relu = nn.ReLU(inplace=True)  # 激活层
+        self.downsample = downsample  # 用来对输入做了一个处理，可能是为了对其网络的输出和残差
+        self.stride = stride
+
+    def forward(self, x):  # 定义网络的向前结构
+        residual = x  # 以输入作为残差
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out += residual  # 将网络的结果与残差进行融合
+        out = self.relu(out)  # 使用激活函数
+
+        return out
+
+
+class ResNet(nn.Module):
+    #   建立一个残差神经网络
+    def __init__(self, block, layers, inp=3, num_classes=150, input_size=112, dropout=0.5):
+        self.inplanes = 64
+        self.inp = inp  # 输入数据的通道数
+        super(ResNet, self).__init__()
+        self.conv1 = nn.Conv2d(inp, 64, kernel_size=7, stride=2, padding=3,
+                               bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)  # 最大池化
+        self.rep_flow = rf.FlowLayer(128)  # 最后加的光流层rep_flow 128个通道
+        self.layer1 = self._make_layer(block, 64, layers[0])  # 第一个残差模块
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)  # 第二个残差模块
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)  # 第三个残差模块
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)  # 第四个残差模块
+
+        # probably need to adjust this based on input spatial size
+        size = int(math.ceil(input_size / 32))
+        self.avgpool = nn.AvgPool2d(size, stride=1)
+        self.dropout = nn.Dropout(p=dropout)
+        self.lstm = nn.LSTM(input_size=512, num_layers=2, hidden_size=512, batch_first=True)  # create by little bear
+        self.lstm_num = 2  # create by little bear
+        self.fc = nn.Linear(512 * block.expansion, num_classes)
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+
+    def _make_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes, planes * block.expansion,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(planes * block.expansion),
+            )
+
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes))
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        # x is BxTxCxHxW
+        # spatio-temporal video data
+        b, t, c, h, w = x.size()
+        # need to view it is B*TxCxHxW for 2D CNN
+        # important to keep batch and time axis next to
+        # eachother, so a simple view without tranposing is possible
+        x = x.view(b * t, c, h, w)
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = x.view(b, t, x.shape[1], x.shape[2], x.shape[3])  # 改变x形状，抽离出时间维度
+        x = x.transpose(1, 2)
+        x = self.rep_flow(x)  # 添加了表示流层
+        x = x.transpose(1, 2)
+        x = x.view(b * (t - 1), x.shape[2], x.shape[3], x.shape[4])  # 将输出的时间维度和batchsize维度折叠(表示流层出来以后，时间维度已经少了一个维度了)
+        x = self.layer3(x)
+        x = self.layer4(x)
+
+        # print(x.size())
+
+        x = self.avgpool(x)
+        x = x.view(x.size(0), -1)
+        h0 = torch.randn(self.lstm_num, b, x.shape[1])  # (num_layers,batch,output_size)
+        c0 = torch.randn(self.lstm_num, b, x.shape[1])
+        x = x.view(b, t - 1, x.shape[1])  # 重新回到最初的形状
+        x, (_, _) = self.lstm(x, (h0, c0))
+        x = x.contiguous().view(b * (t - 1), x.shape[2])  # 此处出现了一个问题不能进行转换，怀疑是x的地址空间不连续
+        x = self.dropout(x)
+        # currently making dense, per-frame predictions
+        x = self.fc(x)
+
+        # so view as BxTxClass
+        x = x.view(b, t - 1, -1)
+        # mean-pool over time
+        x = torch.mean(x, dim=1)
+
+        # return BxClass prediction 
+        return x
+
+    def load_state_dict(self, state_dict, strict=True):
+        # ignore fc layer
+        state_dict = {k: v for k, v in state_dict.items() if 'fc' not in k}
+        md = self.state_dict()
+        md.update(state_dict)
+        # convert to flow representation
+        if self.inp != 3:
+            for k, v in md.items():
+                if k == 'conv1.weight':
+                    if isinstance(v, nn.Parameter):
+                        v = v.data
+                    # change image CNN to 20-channel flow by averaing RGB channels and repeating 20 times
+                    v = torch.mean(v, dim=1).unsqueeze(1).repeat(1, self.inp, 1, 1)
+                    md[k] = v
+
+        super(ResNet, self).load_state_dict(md, strict)
+
+
+def resnet18(pretrained=False, **kwargs):
+    """Constructs a ResNet-18 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
+    return model
+
+
+def resnet34(pretrained=False, mode='rgb', **kwargs):  # 用的是BasicBlock作为基本单元
+    """Constructs a ResNet-34 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    if mode == 'flow':
+        model = ResNet(BasicBlock, [3, 4, 6, 3], inp=20, **kwargs)
+    else:
+        model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
+
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
+    return model
+
+
+def resnet50(pretrained=False, mode='rgb', **kwargs):  # 用的是Bottleneck作为resnet的基本单元
+    """Constructs a ResNet-50 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    if mode == 'flow':
+        model = ResNet(Bottleneck, [3, 4, 6, 3], inp=20, **kwargs)
+    else:
+        model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
+    return model
+
+
+def resnet101(pretrained=False, **kwargs):
+    """Constructs a ResNet-101 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
+    return model
+
+
+def resnet152(pretrained=False, **kwargs):
+    """Constructs a ResNet-152 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
+    return model
+
+
+if __name__ == '__main__':
+    # test resnet 50
+    import torch
+
+    d = torch.device('cpu')
+    net = resnet50(pretrained=False, mode='flow')
+    net.to(d)
+
+    vid = torch.rand((4, 32, 20, 112, 112)).to(d)
+
+    print(net(vid).size())