From 495ffc4777522e40941753e3b1b79c02f84b25b4 Mon Sep 17 00:00:00 2001 From: Grafting Rayman <156515434+GraftingRayman@users.noreply.github.com> Date: Fri, 17 Jan 2025 11:00:30 +0000 Subject: Add files via upload --- r_basicsr/archs/duf_arch.py | 277 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 277 insertions(+) create mode 100644 r_basicsr/archs/duf_arch.py (limited to 'r_basicsr/archs/duf_arch.py') diff --git a/r_basicsr/archs/duf_arch.py b/r_basicsr/archs/duf_arch.py new file mode 100644 index 0000000..9b963a5 --- /dev/null +++ b/r_basicsr/archs/duf_arch.py @@ -0,0 +1,277 @@ +import numpy as np +import torch +from torch import nn as nn +from torch.nn import functional as F + +from r_basicsr.utils.registry import ARCH_REGISTRY + + +class DenseBlocksTemporalReduce(nn.Module): + """A concatenation of 3 dense blocks with reduction in temporal dimension. + + Note that the output temporal dimension is 6 fewer the input temporal dimension, since there are 3 blocks. + + Args: + num_feat (int): Number of channels in the blocks. Default: 64. + num_grow_ch (int): Growing factor of the dense blocks. Default: 32 + adapt_official_weights (bool): Whether to adapt the weights translated from the official implementation. + Set to false if you want to train from scratch. Default: False. + """ + + def __init__(self, num_feat=64, num_grow_ch=32, adapt_official_weights=False): + super(DenseBlocksTemporalReduce, self).__init__() + if adapt_official_weights: + eps = 1e-3 + momentum = 1e-3 + else: # pytorch default values + eps = 1e-05 + momentum = 0.1 + + self.temporal_reduce1 = nn.Sequential( + nn.BatchNorm3d(num_feat, eps=eps, momentum=momentum), nn.ReLU(inplace=True), + nn.Conv3d(num_feat, num_feat, (1, 1, 1), stride=(1, 1, 1), padding=(0, 0, 0), bias=True), + nn.BatchNorm3d(num_feat, eps=eps, momentum=momentum), nn.ReLU(inplace=True), + nn.Conv3d(num_feat, num_grow_ch, (3, 3, 3), stride=(1, 1, 1), padding=(0, 1, 1), bias=True)) + + self.temporal_reduce2 = nn.Sequential( + nn.BatchNorm3d(num_feat + num_grow_ch, eps=eps, momentum=momentum), nn.ReLU(inplace=True), + nn.Conv3d( + num_feat + num_grow_ch, + num_feat + num_grow_ch, (1, 1, 1), + stride=(1, 1, 1), + padding=(0, 0, 0), + bias=True), nn.BatchNorm3d(num_feat + num_grow_ch, eps=eps, momentum=momentum), nn.ReLU(inplace=True), + nn.Conv3d(num_feat + num_grow_ch, num_grow_ch, (3, 3, 3), stride=(1, 1, 1), padding=(0, 1, 1), bias=True)) + + self.temporal_reduce3 = nn.Sequential( + nn.BatchNorm3d(num_feat + 2 * num_grow_ch, eps=eps, momentum=momentum), nn.ReLU(inplace=True), + nn.Conv3d( + num_feat + 2 * num_grow_ch, + num_feat + 2 * num_grow_ch, (1, 1, 1), + stride=(1, 1, 1), + padding=(0, 0, 0), + bias=True), nn.BatchNorm3d(num_feat + 2 * num_grow_ch, eps=eps, momentum=momentum), + nn.ReLU(inplace=True), + nn.Conv3d( + num_feat + 2 * num_grow_ch, num_grow_ch, (3, 3, 3), stride=(1, 1, 1), padding=(0, 1, 1), bias=True)) + + def forward(self, x): + """ + Args: + x (Tensor): Input tensor with shape (b, num_feat, t, h, w). + + Returns: + Tensor: Output with shape (b, num_feat + num_grow_ch * 3, 1, h, w). + """ + x1 = self.temporal_reduce1(x) + x1 = torch.cat((x[:, :, 1:-1, :, :], x1), 1) + + x2 = self.temporal_reduce2(x1) + x2 = torch.cat((x1[:, :, 1:-1, :, :], x2), 1) + + x3 = self.temporal_reduce3(x2) + x3 = torch.cat((x2[:, :, 1:-1, :, :], x3), 1) + + return x3 + + +class DenseBlocks(nn.Module): + """ A concatenation of N dense blocks. + + Args: + num_feat (int): Number of channels in the blocks. Default: 64. + num_grow_ch (int): Growing factor of the dense blocks. Default: 32. + num_block (int): Number of dense blocks. The values are: + DUF-S (16 layers): 3 + DUF-M (18 layers): 9 + DUF-L (52 layers): 21 + adapt_official_weights (bool): Whether to adapt the weights translated from the official implementation. + Set to false if you want to train from scratch. Default: False. + """ + + def __init__(self, num_block, num_feat=64, num_grow_ch=16, adapt_official_weights=False): + super(DenseBlocks, self).__init__() + if adapt_official_weights: + eps = 1e-3 + momentum = 1e-3 + else: # pytorch default values + eps = 1e-05 + momentum = 0.1 + + self.dense_blocks = nn.ModuleList() + for i in range(0, num_block): + self.dense_blocks.append( + nn.Sequential( + nn.BatchNorm3d(num_feat + i * num_grow_ch, eps=eps, momentum=momentum), nn.ReLU(inplace=True), + nn.Conv3d( + num_feat + i * num_grow_ch, + num_feat + i * num_grow_ch, (1, 1, 1), + stride=(1, 1, 1), + padding=(0, 0, 0), + bias=True), nn.BatchNorm3d(num_feat + i * num_grow_ch, eps=eps, momentum=momentum), + nn.ReLU(inplace=True), + nn.Conv3d( + num_feat + i * num_grow_ch, + num_grow_ch, (3, 3, 3), + stride=(1, 1, 1), + padding=(1, 1, 1), + bias=True))) + + def forward(self, x): + """ + Args: + x (Tensor): Input tensor with shape (b, num_feat, t, h, w). + + Returns: + Tensor: Output with shape (b, num_feat + num_block * num_grow_ch, t, h, w). + """ + for i in range(0, len(self.dense_blocks)): + y = self.dense_blocks[i](x) + x = torch.cat((x, y), 1) + return x + + +class DynamicUpsamplingFilter(nn.Module): + """Dynamic upsampling filter used in DUF. + + Ref: https://github.com/yhjo09/VSR-DUF. + It only supports input with 3 channels. And it applies the same filters to 3 channels. + + Args: + filter_size (tuple): Filter size of generated filters. The shape is (kh, kw). Default: (5, 5). + """ + + def __init__(self, filter_size=(5, 5)): + super(DynamicUpsamplingFilter, self).__init__() + if not isinstance(filter_size, tuple): + raise TypeError(f'The type of filter_size must be tuple, but got type{filter_size}') + if len(filter_size) != 2: + raise ValueError(f'The length of filter size must be 2, but got {len(filter_size)}.') + # generate a local expansion filter, similar to im2col + self.filter_size = filter_size + filter_prod = np.prod(filter_size) + expansion_filter = torch.eye(int(filter_prod)).view(filter_prod, 1, *filter_size) # (kh*kw, 1, kh, kw) + self.expansion_filter = expansion_filter.repeat(3, 1, 1, 1) # repeat for all the 3 channels + + def forward(self, x, filters): + """Forward function for DynamicUpsamplingFilter. + + Args: + x (Tensor): Input image with 3 channels. The shape is (n, 3, h, w). + filters (Tensor): Generated dynamic filters. + The shape is (n, filter_prod, upsampling_square, h, w). + filter_prod: prod of filter kernel size, e.g., 1*5*5=25. + upsampling_square: similar to pixel shuffle, + upsampling_square = upsampling * upsampling + e.g., for x 4 upsampling, upsampling_square= 4*4 = 16 + + Returns: + Tensor: Filtered image with shape (n, 3*upsampling_square, h, w) + """ + n, filter_prod, upsampling_square, h, w = filters.size() + kh, kw = self.filter_size + expanded_input = F.conv2d( + x, self.expansion_filter.to(x), padding=(kh // 2, kw // 2), groups=3) # (n, 3*filter_prod, h, w) + expanded_input = expanded_input.view(n, 3, filter_prod, h, w).permute(0, 3, 4, 1, + 2) # (n, h, w, 3, filter_prod) + filters = filters.permute(0, 3, 4, 1, 2) # (n, h, w, filter_prod, upsampling_square] + out = torch.matmul(expanded_input, filters) # (n, h, w, 3, upsampling_square) + return out.permute(0, 3, 4, 1, 2).view(n, 3 * upsampling_square, h, w) + + +@ARCH_REGISTRY.register() +class DUF(nn.Module): + """Network architecture for DUF + + Paper: Jo et.al. Deep Video Super-Resolution Network Using Dynamic + Upsampling Filters Without Explicit Motion Compensation, CVPR, 2018 + Code reference: + https://github.com/yhjo09/VSR-DUF + For all the models below, 'adapt_official_weights' is only necessary when + loading the weights converted from the official TensorFlow weights. + Please set it to False if you are training the model from scratch. + + There are three models with different model size: DUF16Layers, DUF28Layers, + and DUF52Layers. This class is the base class for these models. + + Args: + scale (int): The upsampling factor. Default: 4. + num_layer (int): The number of layers. Default: 52. + adapt_official_weights_weights (bool): Whether to adapt the weights + translated from the official implementation. Set to false if you + want to train from scratch. Default: False. + """ + + def __init__(self, scale=4, num_layer=52, adapt_official_weights=False): + super(DUF, self).__init__() + self.scale = scale + if adapt_official_weights: + eps = 1e-3 + momentum = 1e-3 + else: # pytorch default values + eps = 1e-05 + momentum = 0.1 + + self.conv3d1 = nn.Conv3d(3, 64, (1, 3, 3), stride=(1, 1, 1), padding=(0, 1, 1), bias=True) + self.dynamic_filter = DynamicUpsamplingFilter((5, 5)) + + if num_layer == 16: + num_block = 3 + num_grow_ch = 32 + elif num_layer == 28: + num_block = 9 + num_grow_ch = 16 + elif num_layer == 52: + num_block = 21 + num_grow_ch = 16 + else: + raise ValueError(f'Only supported (16, 28, 52) layers, but got {num_layer}.') + + self.dense_block1 = DenseBlocks( + num_block=num_block, num_feat=64, num_grow_ch=num_grow_ch, + adapt_official_weights=adapt_official_weights) # T = 7 + self.dense_block2 = DenseBlocksTemporalReduce( + 64 + num_grow_ch * num_block, num_grow_ch, adapt_official_weights=adapt_official_weights) # T = 1 + channels = 64 + num_grow_ch * num_block + num_grow_ch * 3 + self.bn3d2 = nn.BatchNorm3d(channels, eps=eps, momentum=momentum) + self.conv3d2 = nn.Conv3d(channels, 256, (1, 3, 3), stride=(1, 1, 1), padding=(0, 1, 1), bias=True) + + self.conv3d_r1 = nn.Conv3d(256, 256, (1, 1, 1), stride=(1, 1, 1), padding=(0, 0, 0), bias=True) + self.conv3d_r2 = nn.Conv3d(256, 3 * (scale**2), (1, 1, 1), stride=(1, 1, 1), padding=(0, 0, 0), bias=True) + + self.conv3d_f1 = nn.Conv3d(256, 512, (1, 1, 1), stride=(1, 1, 1), padding=(0, 0, 0), bias=True) + self.conv3d_f2 = nn.Conv3d( + 512, 1 * 5 * 5 * (scale**2), (1, 1, 1), stride=(1, 1, 1), padding=(0, 0, 0), bias=True) + + def forward(self, x): + """ + Args: + x (Tensor): Input with shape (b, 7, c, h, w) + + Returns: + Tensor: Output with shape (b, c, h * scale, w * scale) + """ + num_batches, num_imgs, _, h, w = x.size() + + x = x.permute(0, 2, 1, 3, 4) # (b, c, 7, h, w) for Conv3D + x_center = x[:, :, num_imgs // 2, :, :] + + x = self.conv3d1(x) + x = self.dense_block1(x) + x = self.dense_block2(x) + x = F.relu(self.bn3d2(x), inplace=True) + x = F.relu(self.conv3d2(x), inplace=True) + + # residual image + res = self.conv3d_r2(F.relu(self.conv3d_r1(x), inplace=True)) + + # filter + filter_ = self.conv3d_f2(F.relu(self.conv3d_f1(x), inplace=True)) + filter_ = F.softmax(filter_.view(num_batches, 25, self.scale**2, h, w), dim=1) + + # dynamic filter + out = self.dynamic_filter(x_center, filter_) + out += res.squeeze_(2) + out = F.pixel_shuffle(out, self.scale) + + return out -- cgit v1.2.3