【Pytorch】print(model)とtorchinfo.summary(model)で層の表示に違いがあるのはなぜか？？

実現したいこと

あるコードで定義されているFaceEncoderモデルの構造を可視化する為、２つの方法を試しました。
① torchinfo.summary(model)
② print(model)
表示方法に違いはあれど、途中まで対応する層があるのですが、最後の方で①にはあるGatedConv2dとAdaptiveAvgPool2dの間のResNetと最後のConv2dが②にはありません。
こちらの理由を知りたいです！
よろしくお願いします。

① torchinfo.summary(model)

...
├─GatedConv2d: 1-7                            [10, 1024, 16, 16]        --
│    └─Sequential: 2-24                       [10, 1024, 16, 16]        --
│    │    └─Conv2d: 3-36                      [10, 512, 16, 16]         4,719,104
│    │    └─BatchNorm2d: 3-37                 [10, 512, 16, 16]         1,024
│    │    └─ReLU: 3-38                        [10, 512, 16, 16]         --
│    │    └─Conv2d: 3-39                      [10, 1024, 16, 16]        4,719,616
│    │    └─Sigmoid: 3-40                     [10, 1024, 16, 16]        --
├─ResNet: 1-8                                 --                        (recursive)
│    └─Sequential: 2-25                       [10, 2048, 8, 8]          --
│    │    └─Bottleneck: 3-41                  [10, 2048, 8, 8]          6,039,552
│    │    └─Bottleneck: 3-42                  [10, 2048, 8, 8]          4,462,592
│    │    └─Bottleneck: 3-43                  [10, 2048, 8, 8]          4,462,592
├─AdaptiveAvgPool2d: 1-9                      [10, 2048, 1, 1]          --
├─Conv2d: 1-10                                [10, 257, 1, 1]           526,593

② print(model)

  ...
  (gated): GatedConv2d(
    (ConvMask): Sequential(
      (0): Conv2d(1024, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): Conv2d(512, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (4): Sigmoid()
    )
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(1, 1))
)

該当のソースコード

python network.pyで①、②を可視化。

import torch
from torch import nn
# from models.resnet50 import resnet50
from resnet50 import resnet50
import torch.nn.functional as F
from torchinfo import summary


class ConvBNReLU(nn.Module):
    def __init__(self, in_chan, out_chan, ks=3, stride=1, padding=1, *args, **kwargs):
        super(ConvBNReLU, self).__init__()
        self.conv = nn.Conv2d(in_chan,
                              out_chan,
                              kernel_size=ks,
                              stride=stride,
                              padding=padding,
                              bias=False)
        self.bn = nn.BatchNorm2d(out_chan)
        self.init_weight()

    def init_weight(self):
        for ly in self.children():
            if isinstance(ly, nn.Conv2d):
                nn.init.kaiming_normal_(ly.weight, a=1)
                if not ly.bias is None: nn.init.constant_(ly.bias, 0)

    def forward(self, x):
        x = self.conv(x)
        x = F.relu(self.bn(x))
        return x


class AttentionRefinementModule(nn.Module):
    def __init__(self, in_chan, out_chan):
        super().__init__()
        self.conv = ConvBNReLU(in_chan, out_chan, ks=3, stride=1, padding=1)
        self.conv_atten = nn.Conv2d(out_chan, out_chan, kernel_size=1, bias=False)
        self.bn_atten = nn.BatchNorm2d(out_chan)
        self.sigmoid_atten = nn.Sigmoid()
        self.init_weight()

    def init_weight(self):
        for ly in self.children():
            if isinstance(ly, nn.Conv2d):
                nn.init.kaiming_normal_(ly.weight, a=1)
                if not ly.bias is None: nn.init.constant_(ly.bias, 0)

    def forward(self, x):
        feat = self.conv(x)
        atten = F.avg_pool2d(feat, feat.size()[2:])
        atten = self.conv_atten(atten)
        atten = self.bn_atten(atten)
        atten = self.sigmoid_atten(atten)
        out = torch.mul(feat, atten)
        return out


class FeatureFusionModule(nn.Module):
    def __init__(self, in_chan, out_chan):
        super().__init__()
        self.convblk = ConvBNReLU(in_chan, out_chan, ks=1, stride=1, padding=0)
        self.conv1 = nn.Conv2d(out_chan,
                               out_chan // 4,
                               kernel_size=1,
                               stride=1,
                               padding=0,
                               bias=False)
        self.conv2 = nn.Conv2d(out_chan // 4, out_chan,
                               kernel_size=1,
                               stride=1,
                               padding=0,
                               bias=False)
        self.relu = nn.ReLU(inplace=True)
        self.sigmoid = nn.Sigmoid()
        self.init_weight()

    def init_weight(self):
        for ly in self.children():
            if isinstance(ly, nn.Conv2d):
                nn.init.kaiming_normal_(ly.weight, a=1)
                if not ly.bias is None: nn.init.constant_(ly.bias, 0)

    def forward(self, fsp, fcp):
        fcat = torch.cat([fsp, fcp], dim=1)
        feat = self.convblk(fcat)
        atten = F.avg_pool2d(feat, feat.size()[2:])
        atten = self.conv1(atten)
        atten = self.relu(atten)
        atten = self.conv2(atten)
        atten = self.sigmoid(atten)
        feat_atten = torch.mul(feat, atten)
        feat_out = feat_atten + feat
        return feat_out


class ContextPath(nn.Module):
    def __init__(self):
        super().__init__()
        self.arm8 = AttentionRefinementModule(512, 256)
        self.arm16 = AttentionRefinementModule(1024, 256)
        self.conv_head8 = ConvBNReLU(256, 256, 3, 1, 1)
        self.conv_head16 = ConvBNReLU(256, 256, 3, 1, 1)
        self.conv_avg = ConvBNReLU(1024, 256, 1, 1, 0)
        self.init_weight()

    def init_weight(self):
        for ly in self.children():
            if isinstance(ly, nn.Conv2d):
                nn.init.kaiming_normal_(ly.weight, a=1)
                if not ly.bias is None: nn.init.constant_(ly.bias, 0)

    def forward(self, feat4, feat8, feat16):
        H4, W4 = feat4.shape[2:]
        H8, W8 = feat8.shape[2:]
        H16, W16 = feat16.shape[2:]

        avg = F.avg_pool2d(feat16, feat16.size()[2:])
        avg = self.conv_avg(avg)
        avg_up = F.interpolate(avg, (H16, W16), mode='nearest')

        feat16_arm = self.arm16(feat16)
        feat16_sum = feat16_arm + avg_up
        feat16_up = F.interpolate(feat16_sum, (H8, W8), mode='nearest')
        feat16_up = self.conv_head16(feat16_up)

        feat8_arm = self.arm8(feat8)
        feat8_sum = feat8_arm + feat16_up
        feat8_up = F.interpolate(feat8_sum, (H4, W4), mode='nearest')
        feat8_up = self.conv_head8(feat8_up)

        return feat4, feat8_up, feat16_up


class BiSeNetOutPut(nn.Module):
    def __init__(self, in_chan, mid_chan, n_classes):
        super().__init__()
        self.conv = ConvBNReLU(in_chan, mid_chan, ks=3, stride=1, padding=1)
        self.conv_out = nn.Conv2d(mid_chan, n_classes, kernel_size=1, bias=False)
        self.init_weight()

    def init_weight(self):
        for ly in self.children():
            if isinstance(ly, nn.Conv2d):
                nn.init.kaiming_normal_(ly.weight, a=1)
                if not ly.bias is None: nn.init.constant_(ly.bias, 0)

    def forward(self, x):
        x = self.conv(x)
        x = self.conv_out(x)
        return x


class GatedConv2d(nn.Module):
    def __init__(self, in_chan, mid_chan):
        super().__init__()
        self.ConvMask = nn.Sequential(
            nn.Conv2d(in_chan, mid_chan, 3, 1, 1),
            nn.BatchNorm2d(mid_chan),
            nn.ReLU(inplace=True),
            nn.Conv2d(mid_chan, in_chan, 3, 1, 1),
            nn.Sigmoid()
        )
        self.init_weight()

    def init_weight(self):
        for ly in self.children():
            if isinstance(ly, nn.Conv2d):
                nn.init.kaiming_normal_(ly.weight, a=1)
                if not ly.bias is None: nn.init.constant_(ly.bias, 0)

    def forward(self, feat):
        mask = self.ConvMask(feat)
        out = feat * mask
        return out


class FaceEncoder(nn.Module):
    def __init__(self):
        super(FaceEncoder, self).__init__()
        self.feat = resnet50(pretrained=True) # 事前学習済みのResNet-50モデルを使用して特徴マップを抽出するための部分モジュール
        self.coeff = nn.Conv2d(2048, 257, (1, 1))
        self.cp = ContextPath()
        self.ffm = FeatureFusionModule(512, 256)
        self.conv_out = BiSeNetOutPut(256, 256, n_classes=1)
        self.conv_out8 = BiSeNetOutPut(256, 128, n_classes=1)
        self.conv_out16 = BiSeNetOutPut(256, 128, n_classes=1)
        self.gated = GatedConv2d(1024, 512)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.weight_init()

    @torch.no_grad()
    def weight_init(self):
        self.coeff.weight.zero_()
        self.coeff.bias.zero_()

    def forward(self, img):
        H, W = img.shape[2:]
        # 入力画像から特徴マップを抽出（feat4, feat8, feat16）
        feat4, feat8, feat16 = self.feat(img)  # 1/4, 1/8. 1/16 of initial size : [10, 256, 64, 64], [10, 512, 32, 32], [10, 1024, 16, 16]
        feat_res4, feat_cp4, feat_cp8 = self.cp(feat4, feat8, feat16) # [10, 256, 64, 64], [10, 256, 64, 64], [10, 256, 32, 32]
        feat_sp = feat_res4

        feat_fuse = self.ffm(feat_sp, feat_cp4)
        feat_out = self.conv_out(feat_fuse)
        feat_out8 = self.conv_out8(feat_cp4)
        feat_out16 = self.conv_out16(feat_cp8)
        feat_out = F.interpolate(feat_out, (H, W), mode='bilinear', align_corners=True)
        feat_out8 = F.interpolate(feat_out8, (H, W), mode='bilinear', align_corners=True)
        feat_out16 = F.interpolate(feat_out16, (H, W), mode='bilinear', align_corners=True)
        
        feat16 = self.gated(feat16)
        feat = self.feat.layer4(feat16) # [10, 2048, 8, 8]
        feat = self.avgpool(feat) # [10, 2048, 1, 1]
        coeff = self.coeff(feat) # [10, 257, 1, 1]
        return coeff.squeeze(2).squeeze(2), feat_out, feat_out8, feat_out16


if __name__ == '__main__':
    model = FaceEncoder()
    print(model)
    summary(model=FaceEncoder(), input_size=(10, 3, 256, 256))

quickquip

2023/07/09 02:05

結果を切り取らないで全部載せたらいかがですか?

tetsutail_study

2023/07/09 13:09

@quickquipさんご指摘ありがとうございます。以下に結果の全体像を載せます。

tetsutail_study

2023/07/09 13:09

↓ torchinfo.summary =============================================================================================== Layer (type:depth-idx) Output Shape Param # =============================================================================================== FaceEncoder [10, 257] -- ├─ResNet: 1-1 [10, 256, 64, 64] 14,964,736 │ └─Conv2d: 2-1 [10, 64, 128, 128] 9,408 │ └─BatchNorm2d: 2-2 [10, 64, 128, 128] 128 │ └─ReLU: 2-3 [10, 64, 128, 128] -- │ └─MaxPool2d: 2-4 [10, 64, 64, 64] -- │ └─Sequential: 2-5 [10, 256, 64, 64] -- │ │ └─Bottleneck: 3-1 [10, 256, 64, 64] 75,008 │ │ └─Bottleneck: 3-2 [10, 256, 64, 64] 70,400 │ │ └─Bottleneck: 3-3 [10, 256, 64, 64] 70,400 │ └─Sequential: 2-6 [10, 512, 32, 32] -- │ │ └─Bottleneck: 3-4 [10, 512, 32, 32] 379,392 │ │ └─Bottleneck: 3-5 [10, 512, 32, 32] 280,064 │ │ └─Bottleneck: 3-6 [10, 512, 32, 32] 280,064 │ │ └─Bottleneck: 3-7 [10, 512, 32, 32] 280,064 │ └─Sequential: 2-7 [10, 1024, 16, 16] -- │ │ └─Bottleneck: 3-8 [10, 1024, 16, 16] 1,512,448 │ │ └─Bottleneck: 3-9 [10, 1024, 16, 16] 1,117,184 │ │ └─Bottleneck: 3-10 [10, 1024, 16, 16] 1,117,184 │ │ └─Bottleneck: 3-11 [10, 1024, 16, 16] 1,117,184 │ │ └─Bottleneck: 3-12 [10, 1024, 16, 16] 1,117,184 │ │ └─Bottleneck: 3-13 [10, 1024, 16, 16] 1,117,184 ├─ContextPath: 1-2 [10, 256, 64, 64] -- │ └─ConvBNReLU: 2-8 [10, 256, 1, 1] -- │ │ └─Conv2d: 3-14 [10, 256, 1, 1] 262,144 │ │ └─BatchNorm2d: 3-15 [10, 256, 1, 1] 512 │ └─AttentionRefinementModule: 2-9 [10, 256, 16, 16] -- │ │ └─ConvBNReLU: 3-16 [10, 256, 16, 16] 2,359,808 │ │ └─Conv2d: 3-17 [10, 256, 1, 1] 65,536 │ │ └─BatchNorm2d: 3-18 [10, 256, 1, 1] 512 │ │ └─Sigmoid: 3-19 [10, 256, 1, 1] -- │ └─ConvBNReLU: 2-10 [10, 256, 32, 32] -- │ │ └─Conv2d: 3-20 [10, 256, 32, 32] 589,824 │ │ └─BatchNorm2d: 3-21 [10, 256, 32, 32] 512 │ └─AttentionRefinementModule: 2-11 [10, 256, 32, 32] -- │ │ └─ConvBNReLU: 3-22 [10, 256, 32, 32] 1,180,160 │ │ └─Conv2d: 3-23 [10, 256, 1, 1] 65,536 │ │ └─BatchNorm2d: 3-24 [10, 256, 1, 1] 512 │ │ └─Sigmoid: 3-25 [10, 256, 1, 1] -- │ └─ConvBNReLU: 2-12 [10, 256, 64, 64] -- │ │ └─Conv2d: 3-26 [10, 256, 64, 64] 589,824 │ │ └─BatchNorm2d: 3-27 [10, 256, 64, 64] 512 ├─FeatureFusionModule: 1-3 [10, 256, 64, 64] -- │ └─ConvBNReLU: 2-13 [10, 256, 64, 64] -- │ │ └─Conv2d: 3-28 [10, 256, 64, 64] 131,072 │ │ └─BatchNorm2d: 3-29 [10, 256, 64, 64] 512 │ └─Conv2d: 2-14 [10, 64, 1, 1] 16,384 │ └─ReLU: 2-15 [10, 64, 1, 1] -- │ └─Conv2d: 2-16 [10, 256, 1, 1] 16,384 │ └─Sigmoid: 2-17 [10, 256, 1, 1] -- ├─BiSeNetOutPut: 1-4 [10, 1, 64, 64] -- │ └─ConvBNReLU: 2-18 [10, 256, 64, 64] -- │ │ └─Conv2d: 3-30 [10, 256, 64, 64] 589,824 │ │ └─BatchNorm2d: 3-31 [10, 256, 64, 64] 512 │ └─Conv2d: 2-19 [10, 1, 64, 64] 256 ├─BiSeNetOutPut: 1-5 [10, 1, 64, 64] -- │ └─ConvBNReLU: 2-20 [10, 128, 64, 64] -- │ │ └─Conv2d: 3-32 [10, 128, 64, 64] 294,912 │ │ └─BatchNorm2d: 3-33 [10, 128, 64, 64] 256 │ └─Conv2d: 2-21 [10, 1, 64, 64] 128 ├─BiSeNetOutPut: 1-6 [10, 1, 32, 32] -- │ └─ConvBNReLU: 2-22 [10, 128, 32, 32] -- │ │ └─Conv2d: 3-34 [10, 128, 32, 32] 294,912 │ │ └─BatchNorm2d: 3-35 [10, 128, 32, 32] 256 │ └─Conv2d: 2-23 [10, 1, 32, 32] 128 ├─GatedConv2d: 1-7 [10, 1024, 16, 16] -- │ └─Sequential: 2-24 [10, 1024, 16, 16] -- │ │ └─Conv2d: 3-36 [10, 512, 16, 16] 4,719,104 │ │ └─BatchNorm2d: 3-37 [10, 512, 16, 16] 1,024 │ │ └─ReLU: 3-38 [10, 512, 16, 16] -- │ │ └─Conv2d: 3-39 [10, 1024, 16, 16] 4,719,616 │ │ └─Sigmoid: 3-40 [10, 1024, 16, 16] -- ├─ResNet: 1-8 -- (recursive) │ └─Sequential: 2-25 [10, 2048, 8, 8] -- │ │ └─Bottleneck: 3-41 [10, 2048, 8, 8] 6,039,552 │ │ └─Bottleneck: 3-42 [10, 2048, 8, 8] 4,462,592 │ │ └─Bottleneck: 3-43 [10, 2048, 8, 8] 4,462,592 ├─AdaptiveAvgPool2d: 1-9 [10, 2048, 1, 1] -- ├─Conv2d: 1-10 [10, 257, 1, 1] 526,593 =============================================================================================== Total params: 54,900,033 Trainable params: 54,900,033 Non-trainable params: 0 Total mult-adds (G): 170.52 =============================================================================================== Input size (MB): 7.86 Forward/backward pass size (MB): 3067.99 Params size (MB): 159.74 Estimated Total Size (MB): 3235.60 ===============================================================================================

tetsutail_study

2023/07/09 13:10

↓ print(model) FaceEncoder( (feat): ResNet( (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace=True) (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False) (layer1): Sequential( (0): Bottleneck( (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace=True) (downsample): Sequential( (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (1): Bottleneck( (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace=True) ) (2): Bottleneck( (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace=True) ) ) (layer2): Sequential( (0): Bottleneck( (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace=True) (downsample): Sequential( (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False) (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (1): Bottleneck( (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace=True) ) (2): Bottleneck( (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace=True) ) (3): Bottleneck( (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace=True) ) ) (layer3): Sequential( (0): Bottleneck( (conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace=True) (downsample): Sequential( (0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False) (1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (1): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace=True) ) (2): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace=True) ) (3): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace=True) ) (4): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace=True) ) (5): Bottleneck( (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace=True) ) ) (layer4): Sequential( (0): Bottleneck( (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False) (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) (relu): ReLU(inplace=True) (downsample): Sequential( (0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False) (1): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) ) ) (1): Bottleneck( (conv1): Conv2d(2048, 512, kernel_si

tetsutail_study

2023/07/09 13:12

よろしくお願いいたします。。

quickquip

2023/07/09 23:32

質問は編集できます。情報はこの欄ではなくて質問に載せましょう。