Skip to content

odak.learn.models

odak.learn.models

Provides necessary definitions for components used in machine learning and deep learning.

channel_gate

Bases: Module

Channel attention module with various pooling strategies. This class is heavily inspired https://github.com/Jongchan/attention-module/commit/e4ee180f1335c09db14d39a65d97c8ca3d1f7b16 (MIT License).

Source code in odak/learn/models/components.py
class channel_gate(torch.nn.Module):
    """
    Channel attention module with various pooling strategies.
    This class is heavily inspired https://github.com/Jongchan/attention-module/commit/e4ee180f1335c09db14d39a65d97c8ca3d1f7b16 (MIT License).
    """
    def __init__(
                 self, 
                 gate_channels, 
                 reduction_ratio = 16, 
                 pool_types = ['avg', 'max']
                ):
        """
        Initializes the channel gate module.

        Parameters
        ----------
        gate_channels   : int
                          Number of channels of the input feature map.
        reduction_ratio : int
                          Reduction ratio for the intermediate layer.
        pool_types      : list
                          List of pooling operations to apply.
        """
        super().__init__()
        self.gate_channels = gate_channels
        hidden_channels = gate_channels // reduction_ratio
        if hidden_channels == 0:
            hidden_channels = 1
        self.mlp = torch.nn.Sequential(
                                       convolutional_block_attention.Flatten(),
                                       torch.nn.Linear(gate_channels, hidden_channels),
                                       torch.nn.ReLU(),
                                       torch.nn.Linear(hidden_channels, gate_channels)
                                      )
        self.pool_types = pool_types


    def forward(self, x):
        """
        Forward pass of the ChannelGate module.

        Applies channel-wise attention to the input tensor.

        Parameters
        ----------
        x            : torch.tensor
                       Input tensor to the ChannelGate module.

        Returns
        -------
        output       : torch.tensor
                       Output tensor after applying channel attention.
        """
        channel_att_sum = None
        for pool_type in self.pool_types:
            if pool_type == 'avg':
                pool = torch.nn.functional.avg_pool2d(x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
            elif pool_type == 'max':
                pool = torch.nn.functional.max_pool2d(x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
            channel_att_raw = self.mlp(pool)
            channel_att_sum = channel_att_raw if channel_att_sum is None else channel_att_sum + channel_att_raw
        scale = torch.sigmoid(channel_att_sum).unsqueeze(2).unsqueeze(3).expand_as(x)
        output = x * scale
        return output

__init__(gate_channels, reduction_ratio=16, pool_types=['avg', 'max'])

Initializes the channel gate module.

Parameters:

  • gate_channels
              Number of channels of the input feature map.
    
  • reduction_ratio (int, default: 16 ) –
              Reduction ratio for the intermediate layer.
    
  • pool_types
              List of pooling operations to apply.
    
Source code in odak/learn/models/components.py
def __init__(
             self, 
             gate_channels, 
             reduction_ratio = 16, 
             pool_types = ['avg', 'max']
            ):
    """
    Initializes the channel gate module.

    Parameters
    ----------
    gate_channels   : int
                      Number of channels of the input feature map.
    reduction_ratio : int
                      Reduction ratio for the intermediate layer.
    pool_types      : list
                      List of pooling operations to apply.
    """
    super().__init__()
    self.gate_channels = gate_channels
    hidden_channels = gate_channels // reduction_ratio
    if hidden_channels == 0:
        hidden_channels = 1
    self.mlp = torch.nn.Sequential(
                                   convolutional_block_attention.Flatten(),
                                   torch.nn.Linear(gate_channels, hidden_channels),
                                   torch.nn.ReLU(),
                                   torch.nn.Linear(hidden_channels, gate_channels)
                                  )
    self.pool_types = pool_types

forward(x)

Forward pass of the ChannelGate module.

Applies channel-wise attention to the input tensor.

Parameters:

  • x
           Input tensor to the ChannelGate module.
    

Returns:

  • output ( tensor ) –

    Output tensor after applying channel attention.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward pass of the ChannelGate module.

    Applies channel-wise attention to the input tensor.

    Parameters
    ----------
    x            : torch.tensor
                   Input tensor to the ChannelGate module.

    Returns
    -------
    output       : torch.tensor
                   Output tensor after applying channel attention.
    """
    channel_att_sum = None
    for pool_type in self.pool_types:
        if pool_type == 'avg':
            pool = torch.nn.functional.avg_pool2d(x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
        elif pool_type == 'max':
            pool = torch.nn.functional.max_pool2d(x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
        channel_att_raw = self.mlp(pool)
        channel_att_sum = channel_att_raw if channel_att_sum is None else channel_att_sum + channel_att_raw
    scale = torch.sigmoid(channel_att_sum).unsqueeze(2).unsqueeze(3).expand_as(x)
    output = x * scale
    return output

convolution_layer

Bases: Module

A convolution layer.

Source code in odak/learn/models/components.py
class convolution_layer(torch.nn.Module):
    """
    A convolution layer.
    """
    def __init__(
                 self,
                 input_channels = 2,
                 output_channels = 2,
                 kernel_size = 3,
                 bias = False,
                 stride = 1,
                 normalization = True,
                 activation = torch.nn.ReLU()
                ):
        """
        A convolutional layer class.


        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        layers = [
            torch.nn.Conv2d(
                            input_channels,
                            output_channels,
                            kernel_size = kernel_size,
                            stride = stride,
                            padding = kernel_size // 2,
                            bias = bias
                           )
        ]
        if normalization:
            layers.append(torch.nn.BatchNorm2d(output_channels))
        if activation:
            layers.append(activation)
        self.model = torch.nn.Sequential(*layers)


    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x             : torch.tensor
                        Input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        result = self.model(x)
        return result

__init__(input_channels=2, output_channels=2, kernel_size=3, bias=False, stride=1, normalization=True, activation=torch.nn.ReLU())

A convolutional layer class.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int, default: 2 ) –
              Number of output channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • normalization
              If True, adds a Batch Normalization layer after the convolutional layer.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
             self,
             input_channels = 2,
             output_channels = 2,
             kernel_size = 3,
             bias = False,
             stride = 1,
             normalization = True,
             activation = torch.nn.ReLU()
            ):
    """
    A convolutional layer class.


    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    layers = [
        torch.nn.Conv2d(
                        input_channels,
                        output_channels,
                        kernel_size = kernel_size,
                        stride = stride,
                        padding = kernel_size // 2,
                        bias = bias
                       )
    ]
    if normalization:
        layers.append(torch.nn.BatchNorm2d(output_channels))
    if activation:
        layers.append(activation)
    self.model = torch.nn.Sequential(*layers)

forward(x)

Forward model.

Parameters:

  • x
            Input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x             : torch.tensor
                    Input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    result = self.model(x)
    return result

convolutional_block_attention

Bases: Module

Convolutional Block Attention Module (CBAM) class. This class is heavily inspired https://github.com/Jongchan/attention-module/commit/e4ee180f1335c09db14d39a65d97c8ca3d1f7b16 (MIT License).

Source code in odak/learn/models/components.py
class convolutional_block_attention(torch.nn.Module):
    """
    Convolutional Block Attention Module (CBAM) class. 
    This class is heavily inspired https://github.com/Jongchan/attention-module/commit/e4ee180f1335c09db14d39a65d97c8ca3d1f7b16 (MIT License).
    """
    def __init__(
                 self, 
                 gate_channels, 
                 reduction_ratio = 16, 
                 pool_types = ['avg', 'max'], 
                 no_spatial = False
                ):
        """
        Initializes the convolutional block attention module.

        Parameters
        ----------
        gate_channels   : int
                          Number of channels of the input feature map.
        reduction_ratio : int
                          Reduction ratio for the channel attention.
        pool_types      : list
                          List of pooling operations to apply for channel attention.
        no_spatial      : bool
                          If True, spatial attention is not applied.
        """
        super(convolutional_block_attention, self).__init__()
        self.channel_gate = channel_gate(gate_channels, reduction_ratio, pool_types)
        self.no_spatial = no_spatial
        if not no_spatial:
            self.spatial_gate = spatial_gate()


    class Flatten(torch.nn.Module):
        """
        Flattens the input tensor to a 2D matrix.
        """
        def forward(self, x):
            return x.view(x.size(0), -1)


    def forward(self, x):
        """
        Forward pass of the convolutional block attention module.

        Parameters
        ----------
        x            : torch.tensor
                       Input tensor to the CBAM module.

        Returns
        -------
        x_out        : torch.tensor
                       Output tensor after applying channel and spatial attention.
        """
        x_out = self.channel_gate(x)
        if not self.no_spatial:
            x_out = self.spatial_gate(x_out)
        return x_out

Flatten

Bases: Module

Flattens the input tensor to a 2D matrix.

Source code in odak/learn/models/components.py
class Flatten(torch.nn.Module):
    """
    Flattens the input tensor to a 2D matrix.
    """
    def forward(self, x):
        return x.view(x.size(0), -1)

__init__(gate_channels, reduction_ratio=16, pool_types=['avg', 'max'], no_spatial=False)

Initializes the convolutional block attention module.

Parameters:

  • gate_channels
              Number of channels of the input feature map.
    
  • reduction_ratio (int, default: 16 ) –
              Reduction ratio for the channel attention.
    
  • pool_types
              List of pooling operations to apply for channel attention.
    
  • no_spatial
              If True, spatial attention is not applied.
    
Source code in odak/learn/models/components.py
def __init__(
             self, 
             gate_channels, 
             reduction_ratio = 16, 
             pool_types = ['avg', 'max'], 
             no_spatial = False
            ):
    """
    Initializes the convolutional block attention module.

    Parameters
    ----------
    gate_channels   : int
                      Number of channels of the input feature map.
    reduction_ratio : int
                      Reduction ratio for the channel attention.
    pool_types      : list
                      List of pooling operations to apply for channel attention.
    no_spatial      : bool
                      If True, spatial attention is not applied.
    """
    super(convolutional_block_attention, self).__init__()
    self.channel_gate = channel_gate(gate_channels, reduction_ratio, pool_types)
    self.no_spatial = no_spatial
    if not no_spatial:
        self.spatial_gate = spatial_gate()

forward(x)

Forward pass of the convolutional block attention module.

Parameters:

  • x
           Input tensor to the CBAM module.
    

Returns:

  • x_out ( tensor ) –

    Output tensor after applying channel and spatial attention.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward pass of the convolutional block attention module.

    Parameters
    ----------
    x            : torch.tensor
                   Input tensor to the CBAM module.

    Returns
    -------
    x_out        : torch.tensor
                   Output tensor after applying channel and spatial attention.
    """
    x_out = self.channel_gate(x)
    if not self.no_spatial:
        x_out = self.spatial_gate(x_out)
    return x_out

double_convolution

Bases: Module

A double convolution layer.

Source code in odak/learn/models/components.py
class double_convolution(torch.nn.Module):
    """
    A double convolution layer.
    """
    def __init__(
                 self,
                 input_channels = 2,
                 mid_channels = None,
                 output_channels = 2,
                 kernel_size = 3, 
                 bias = False,
                 normalization = True,
                 activation = torch.nn.ReLU()
                ):
        """
        Double convolution model.


        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        mid_channels    : int
                          Number of channels in the hidden layer between two convolutions.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool 
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        if isinstance(mid_channels, type(None)):
            mid_channels = output_channels
        self.activation = activation
        self.model = torch.nn.Sequential(
                                         convolution_layer(
                                                           input_channels = input_channels,
                                                           output_channels = mid_channels,
                                                           kernel_size = kernel_size,
                                                           bias = bias,
                                                           normalization = normalization,
                                                           activation = self.activation
                                                          ),
                                         convolution_layer(
                                                           input_channels = mid_channels,
                                                           output_channels = output_channels,
                                                           kernel_size = kernel_size,
                                                           bias = bias,
                                                           normalization = normalization,
                                                           activation = self.activation
                                                          )
                                        )


    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x             : torch.tensor
                        Input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.      
        """
        result = self.model(x)
        return result

__init__(input_channels=2, mid_channels=None, output_channels=2, kernel_size=3, bias=False, normalization=True, activation=torch.nn.ReLU())

Double convolution model.

Parameters:

  • input_channels
              Number of input channels.
    
  • mid_channels
              Number of channels in the hidden layer between two convolutions.
    
  • output_channels (int, default: 2 ) –
              Number of output channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • normalization
              If True, adds a Batch Normalization layer after the convolutional layer.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
             self,
             input_channels = 2,
             mid_channels = None,
             output_channels = 2,
             kernel_size = 3, 
             bias = False,
             normalization = True,
             activation = torch.nn.ReLU()
            ):
    """
    Double convolution model.


    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    mid_channels    : int
                      Number of channels in the hidden layer between two convolutions.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool 
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    if isinstance(mid_channels, type(None)):
        mid_channels = output_channels
    self.activation = activation
    self.model = torch.nn.Sequential(
                                     convolution_layer(
                                                       input_channels = input_channels,
                                                       output_channels = mid_channels,
                                                       kernel_size = kernel_size,
                                                       bias = bias,
                                                       normalization = normalization,
                                                       activation = self.activation
                                                      ),
                                     convolution_layer(
                                                       input_channels = mid_channels,
                                                       output_channels = output_channels,
                                                       kernel_size = kernel_size,
                                                       bias = bias,
                                                       normalization = normalization,
                                                       activation = self.activation
                                                      )
                                    )

forward(x)

Forward model.

Parameters:

  • x
            Input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x             : torch.tensor
                    Input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.      
    """
    result = self.model(x)
    return result

downsample_layer

Bases: Module

A downscaling component followed by a double convolution.

Source code in odak/learn/models/components.py
class downsample_layer(torch.nn.Module):
    """
    A downscaling component followed by a double convolution.
    """
    def __init__(
                 self,
                 input_channels,
                 output_channels,
                 kernel_size = 3,
                 bias = False,
                 activation = torch.nn.ReLU()
                ):
        """
        A downscaling component with a double convolution.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool 
                          Set to True to let convolutional layers have bias term.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        self.maxpool_conv = torch.nn.Sequential(
                                                torch.nn.MaxPool2d(2),
                                                double_convolution(
                                                                   input_channels = input_channels,
                                                                   mid_channels = output_channels,
                                                                   output_channels = output_channels,
                                                                   kernel_size = kernel_size,
                                                                   bias = bias,
                                                                   activation = activation
                                                                  )
                                               )


    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x              : torch.tensor
                         First input data.



        Returns
        ----------
        result        : torch.tensor
                        Estimated output.      
        """
        result = self.maxpool_conv(x)
        return result

__init__(input_channels, output_channels, kernel_size=3, bias=False, activation=torch.nn.ReLU())

A downscaling component with a double convolution.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int) –
              Number of output channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
             self,
             input_channels,
             output_channels,
             kernel_size = 3,
             bias = False,
             activation = torch.nn.ReLU()
            ):
    """
    A downscaling component with a double convolution.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool 
                      Set to True to let convolutional layers have bias term.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    self.maxpool_conv = torch.nn.Sequential(
                                            torch.nn.MaxPool2d(2),
                                            double_convolution(
                                                               input_channels = input_channels,
                                                               mid_channels = output_channels,
                                                               output_channels = output_channels,
                                                               kernel_size = kernel_size,
                                                               bias = bias,
                                                               activation = activation
                                                              )
                                           )

forward(x)

Forward model.

Parameters:

  • x
             First input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x              : torch.tensor
                     First input data.



    Returns
    ----------
    result        : torch.tensor
                    Estimated output.      
    """
    result = self.maxpool_conv(x)
    return result

global_feature_module

Bases: Module

A global feature layer that processes global features from input channels and applies them to another input tensor via learned transformations.

Source code in odak/learn/models/components.py
class global_feature_module(torch.nn.Module):
    """
    A global feature layer that processes global features from input channels and
    applies them to another input tensor via learned transformations.
    """
    def __init__(
                 self,
                 input_channels,
                 mid_channels,
                 output_channels,
                 kernel_size,
                 bias = False,
                 normalization = False,
                 activation = torch.nn.ReLU()
                ):
        """
        A global feature layer.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        mid_channels  : int
                          Number of mid channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        self.transformations_1 = global_transformations(input_channels, output_channels)
        self.global_features_1 = double_convolution(
                                                    input_channels = input_channels,
                                                    mid_channels = mid_channels,
                                                    output_channels = output_channels,
                                                    kernel_size = kernel_size,
                                                    bias = bias,
                                                    normalization = normalization,
                                                    activation = activation
                                                   )
        self.global_features_2 = double_convolution(
                                                    input_channels = input_channels,
                                                    mid_channels = mid_channels,
                                                    output_channels = output_channels,
                                                    kernel_size = kernel_size,
                                                    bias = bias,
                                                    normalization = normalization,
                                                    activation = activation
                                                   )
        self.transformations_2 = global_transformations(input_channels, output_channels)


    def forward(self, x1, x2):
        """
        Forward model.

        Parameters
        ----------
        x1             : torch.tensor
                         First input data.
        x2             : torch.tensor
                         Second input data.

        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        global_tensor_1 = self.transformations_1(x1, x2)
        y1 = self.global_features_1(global_tensor_1)
        y2 = self.global_features_2(y1)
        global_tensor_2 = self.transformations_2(y1, y2)
        return global_tensor_2

__init__(input_channels, mid_channels, output_channels, kernel_size, bias=False, normalization=False, activation=torch.nn.ReLU())

A global feature layer.

Parameters:

  • input_channels
              Number of input channels.
    
  • mid_channels
              Number of mid channels.
    
  • output_channels (int) –
              Number of output channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • normalization
              If True, adds a Batch Normalization layer after the convolutional layer.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
             self,
             input_channels,
             mid_channels,
             output_channels,
             kernel_size,
             bias = False,
             normalization = False,
             activation = torch.nn.ReLU()
            ):
    """
    A global feature layer.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    mid_channels  : int
                      Number of mid channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    self.transformations_1 = global_transformations(input_channels, output_channels)
    self.global_features_1 = double_convolution(
                                                input_channels = input_channels,
                                                mid_channels = mid_channels,
                                                output_channels = output_channels,
                                                kernel_size = kernel_size,
                                                bias = bias,
                                                normalization = normalization,
                                                activation = activation
                                               )
    self.global_features_2 = double_convolution(
                                                input_channels = input_channels,
                                                mid_channels = mid_channels,
                                                output_channels = output_channels,
                                                kernel_size = kernel_size,
                                                bias = bias,
                                                normalization = normalization,
                                                activation = activation
                                               )
    self.transformations_2 = global_transformations(input_channels, output_channels)

forward(x1, x2)

Forward model.

Parameters:

  • x1
             First input data.
    
  • x2
             Second input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x1, x2):
    """
    Forward model.

    Parameters
    ----------
    x1             : torch.tensor
                     First input data.
    x2             : torch.tensor
                     Second input data.

    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    global_tensor_1 = self.transformations_1(x1, x2)
    y1 = self.global_features_1(global_tensor_1)
    y2 = self.global_features_2(y1)
    global_tensor_2 = self.transformations_2(y1, y2)
    return global_tensor_2

global_transformations

Bases: Module

A global feature layer that processes global features from input channels and applies learned transformations to another input tensor.

This implementation is adapted from RSGUnet: https://github.com/MTLab/rsgunet_image_enhance.

Reference: J. Huang, P. Zhu, M. Geng et al. "Range Scaling Global U-Net for Perceptual Image Enhancement on Mobile Devices."

Source code in odak/learn/models/components.py
class global_transformations(torch.nn.Module):
    """
    A global feature layer that processes global features from input channels and
    applies learned transformations to another input tensor.

    This implementation is adapted from RSGUnet:
    https://github.com/MTLab/rsgunet_image_enhance.

    Reference:
    J. Huang, P. Zhu, M. Geng et al. "Range Scaling Global U-Net for Perceptual Image Enhancement on Mobile Devices."
    """
    def __init__(
                 self,
                 input_channels,
                 output_channels
                ):
        """
        A global feature layer.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        """
        super().__init__()
        self.global_feature_1 = torch.nn.Sequential(
            torch.nn.Linear(input_channels, output_channels),
            torch.nn.LeakyReLU(0.2, inplace = True),
        )
        self.global_feature_2 = torch.nn.Sequential(
            torch.nn.Linear(output_channels, output_channels),
            torch.nn.LeakyReLU(0.2, inplace = True)
        )


    def forward(self, x1, x2):
        """
        Forward model.

        Parameters
        ----------
        x1             : torch.tensor
                         First input data.
        x2             : torch.tensor
                         Second input data.

        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        y = torch.mean(x2, dim = (2, 3))
        y1 = self.global_feature_1(y)
        y2 = self.global_feature_2(y1)
        y1 = y1.unsqueeze(2).unsqueeze(3)
        y2 = y2.unsqueeze(2).unsqueeze(3)
        result = x1 * y1 + y2
        return result

__init__(input_channels, output_channels)

A global feature layer.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int) –
              Number of output channels.
    
Source code in odak/learn/models/components.py
def __init__(
             self,
             input_channels,
             output_channels
            ):
    """
    A global feature layer.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    """
    super().__init__()
    self.global_feature_1 = torch.nn.Sequential(
        torch.nn.Linear(input_channels, output_channels),
        torch.nn.LeakyReLU(0.2, inplace = True),
    )
    self.global_feature_2 = torch.nn.Sequential(
        torch.nn.Linear(output_channels, output_channels),
        torch.nn.LeakyReLU(0.2, inplace = True)
    )

forward(x1, x2)

Forward model.

Parameters:

  • x1
             First input data.
    
  • x2
             Second input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x1, x2):
    """
    Forward model.

    Parameters
    ----------
    x1             : torch.tensor
                     First input data.
    x2             : torch.tensor
                     Second input data.

    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    y = torch.mean(x2, dim = (2, 3))
    y1 = self.global_feature_1(y)
    y2 = self.global_feature_2(y1)
    y1 = y1.unsqueeze(2).unsqueeze(3)
    y2 = y2.unsqueeze(2).unsqueeze(3)
    result = x1 * y1 + y2
    return result

multi_layer_perceptron

Bases: Module

A multi-layer perceptron model.

Source code in odak/learn/models/models.py
class multi_layer_perceptron(torch.nn.Module):
    """
    A multi-layer perceptron model.
    """

    def __init__(self,
                 dimensions,
                 activation = torch.nn.ReLU(),
                 bias = False,
                 model_type = 'conventional',
                 siren_multiplier = 1.,
                 input_multiplier = None
                ):
        """
        Parameters
        ----------
        dimensions        : list
                            List of integers representing the dimensions of each layer (e.g., [2, 10, 1], where the first layer has two channels and last one has one channel.).
        activation        : torch.nn
                            Nonlinear activation function.
                            Default is `torch.nn.ReLU()`.
        bias              : bool
                            If set to True, linear layers will include biases.
        siren_multiplier  : float
                            When using `SIREN` model type, this parameter functions as a hyperparameter.
                            The original SIREN work uses 30.
                            You can bypass this parameter by providing input that are not normalized and larger then one.
        input_multiplier  : float
                            Initial value of the input multiplier before the very first layer.
        model_type        : str
                            Model type: `conventional`, `swish`, `SIREN`, `FILM SIREN`, `Gaussian`.
                            `conventional` refers to a standard multi layer perceptron.
                            For `SIREN,` see: Sitzmann, Vincent, et al. "Implicit neural representations with periodic activation functions." Advances in neural information processing systems 33 (2020): 7462-7473.
                            For `Swish,` see: Ramachandran, Prajit, Barret Zoph, and Quoc V. Le. "Searching for activation functions." arXiv preprint arXiv:1710.05941 (2017). 
                            For `FILM SIREN,` see: Chan, Eric R., et al. "pi-gan: Periodic implicit generative adversarial networks for 3d-aware image synthesis." Proceedings of the IEEE/CVF conference on computer vision and pattern recognition. 2021.
                            For `Gaussian,` see: Ramasinghe, Sameera, and Simon Lucey. "Beyond periodicity: Towards a unifying framework for activations in coordinate-mlps." In European Conference on Computer Vision, pp. 142-158. Cham: Springer Nature Switzerland, 2022.
        """
        super(multi_layer_perceptron, self).__init__()
        self.activation = activation
        self.bias = bias
        self.model_type = model_type
        self.layers = torch.nn.ModuleList()
        self.siren_multiplier = siren_multiplier
        self.dimensions = dimensions
        for i in range(len(self.dimensions) - 1):
            self.layers.append(torch.nn.Linear(self.dimensions[i], self.dimensions[i + 1], bias = self.bias))
        if not isinstance(input_multiplier, type(None)):
            self.input_multiplier = torch.nn.ParameterList()
            self.input_multiplier.append(torch.nn.Parameter(torch.ones(1, self.dimensions[0]) * input_multiplier))
        if self.model_type == 'FILM SIREN':
            self.alpha = torch.nn.ParameterList()
            for j in self.dimensions[1:-1]:
                self.alpha.append(torch.nn.Parameter(torch.randn(2, 1, j)))
        if self.model_type == 'Gaussian':
            self.alpha = torch.nn.ParameterList()
            for j in self.dimensions[1:-1]:
                self.alpha.append(torch.nn.Parameter(torch.randn(1, 1, j)))


    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x             : torch.tensor
                        Input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.      
        """
        if hasattr(self, 'input_multiplier'):
            result = x * self.input_multiplier[0]
        else:
            result = x
        for layer_id, layer in enumerate(self.layers[:-1]):
            result = layer(result)
            if self.model_type == 'conventional':
                result = self.activation(result)
            elif self.model_type == 'swish':
                result = swish(result)
            elif self.model_type == 'SIREN':
                result = torch.sin(result * self.siren_multiplier)
            elif self.model_type == 'FILM SIREN':
                result = torch.sin(self.alpha[layer_id][0] * result + self.alpha[layer_id][1])
            elif self.model_type == 'Gaussian': 
                result = gaussian(result, self.alpha[layer_id][0])
        result = self.layers[-1](result)
        return result

__init__(dimensions, activation=torch.nn.ReLU(), bias=False, model_type='conventional', siren_multiplier=1.0, input_multiplier=None)

Parameters:

  • dimensions
                List of integers representing the dimensions of each layer (e.g., [2, 10, 1], where the first layer has two channels and last one has one channel.).
    
  • activation
                Nonlinear activation function.
                Default is `torch.nn.ReLU()`.
    
  • bias
                If set to True, linear layers will include biases.
    
  • siren_multiplier
                When using `SIREN` model type, this parameter functions as a hyperparameter.
                The original SIREN work uses 30.
                You can bypass this parameter by providing input that are not normalized and larger then one.
    
  • input_multiplier
                Initial value of the input multiplier before the very first layer.
    
  • model_type
                Model type: `conventional`, `swish`, `SIREN`, `FILM SIREN`, `Gaussian`.
                `conventional` refers to a standard multi layer perceptron.
                For `SIREN,` see: Sitzmann, Vincent, et al. "Implicit neural representations with periodic activation functions." Advances in neural information processing systems 33 (2020): 7462-7473.
                For `Swish,` see: Ramachandran, Prajit, Barret Zoph, and Quoc V. Le. "Searching for activation functions." arXiv preprint arXiv:1710.05941 (2017). 
                For `FILM SIREN,` see: Chan, Eric R., et al. "pi-gan: Periodic implicit generative adversarial networks for 3d-aware image synthesis." Proceedings of the IEEE/CVF conference on computer vision and pattern recognition. 2021.
                For `Gaussian,` see: Ramasinghe, Sameera, and Simon Lucey. "Beyond periodicity: Towards a unifying framework for activations in coordinate-mlps." In European Conference on Computer Vision, pp. 142-158. Cham: Springer Nature Switzerland, 2022.
    
Source code in odak/learn/models/models.py
def __init__(self,
             dimensions,
             activation = torch.nn.ReLU(),
             bias = False,
             model_type = 'conventional',
             siren_multiplier = 1.,
             input_multiplier = None
            ):
    """
    Parameters
    ----------
    dimensions        : list
                        List of integers representing the dimensions of each layer (e.g., [2, 10, 1], where the first layer has two channels and last one has one channel.).
    activation        : torch.nn
                        Nonlinear activation function.
                        Default is `torch.nn.ReLU()`.
    bias              : bool
                        If set to True, linear layers will include biases.
    siren_multiplier  : float
                        When using `SIREN` model type, this parameter functions as a hyperparameter.
                        The original SIREN work uses 30.
                        You can bypass this parameter by providing input that are not normalized and larger then one.
    input_multiplier  : float
                        Initial value of the input multiplier before the very first layer.
    model_type        : str
                        Model type: `conventional`, `swish`, `SIREN`, `FILM SIREN`, `Gaussian`.
                        `conventional` refers to a standard multi layer perceptron.
                        For `SIREN,` see: Sitzmann, Vincent, et al. "Implicit neural representations with periodic activation functions." Advances in neural information processing systems 33 (2020): 7462-7473.
                        For `Swish,` see: Ramachandran, Prajit, Barret Zoph, and Quoc V. Le. "Searching for activation functions." arXiv preprint arXiv:1710.05941 (2017). 
                        For `FILM SIREN,` see: Chan, Eric R., et al. "pi-gan: Periodic implicit generative adversarial networks for 3d-aware image synthesis." Proceedings of the IEEE/CVF conference on computer vision and pattern recognition. 2021.
                        For `Gaussian,` see: Ramasinghe, Sameera, and Simon Lucey. "Beyond periodicity: Towards a unifying framework for activations in coordinate-mlps." In European Conference on Computer Vision, pp. 142-158. Cham: Springer Nature Switzerland, 2022.
    """
    super(multi_layer_perceptron, self).__init__()
    self.activation = activation
    self.bias = bias
    self.model_type = model_type
    self.layers = torch.nn.ModuleList()
    self.siren_multiplier = siren_multiplier
    self.dimensions = dimensions
    for i in range(len(self.dimensions) - 1):
        self.layers.append(torch.nn.Linear(self.dimensions[i], self.dimensions[i + 1], bias = self.bias))
    if not isinstance(input_multiplier, type(None)):
        self.input_multiplier = torch.nn.ParameterList()
        self.input_multiplier.append(torch.nn.Parameter(torch.ones(1, self.dimensions[0]) * input_multiplier))
    if self.model_type == 'FILM SIREN':
        self.alpha = torch.nn.ParameterList()
        for j in self.dimensions[1:-1]:
            self.alpha.append(torch.nn.Parameter(torch.randn(2, 1, j)))
    if self.model_type == 'Gaussian':
        self.alpha = torch.nn.ParameterList()
        for j in self.dimensions[1:-1]:
            self.alpha.append(torch.nn.Parameter(torch.randn(1, 1, j)))

forward(x)

Forward model.

Parameters:

  • x
            Input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/models.py
def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x             : torch.tensor
                    Input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.      
    """
    if hasattr(self, 'input_multiplier'):
        result = x * self.input_multiplier[0]
    else:
        result = x
    for layer_id, layer in enumerate(self.layers[:-1]):
        result = layer(result)
        if self.model_type == 'conventional':
            result = self.activation(result)
        elif self.model_type == 'swish':
            result = swish(result)
        elif self.model_type == 'SIREN':
            result = torch.sin(result * self.siren_multiplier)
        elif self.model_type == 'FILM SIREN':
            result = torch.sin(self.alpha[layer_id][0] * result + self.alpha[layer_id][1])
        elif self.model_type == 'Gaussian': 
            result = gaussian(result, self.alpha[layer_id][0])
    result = self.layers[-1](result)
    return result

non_local_layer

Bases: Module

Self-Attention Layer [zi = Wzyi + xi] (non-local block : ref https://arxiv.org/abs/1711.07971)

Source code in odak/learn/models/components.py
class non_local_layer(torch.nn.Module):
    """
    Self-Attention Layer [zi = Wzyi + xi] (non-local block : ref https://arxiv.org/abs/1711.07971)
    """
    def __init__(
                 self,
                 input_channels = 1024,
                 bottleneck_channels = 512,
                 kernel_size = 1,
                 bias = False,
                ):
        """

        Parameters
        ----------
        input_channels      : int
                              Number of input channels.
        bottleneck_channels : int
                              Number of middle channels.
        kernel_size         : int
                              Kernel size.
        bias                : bool 
                              Set to True to let convolutional layers have bias term.
        """
        super(non_local_layer, self).__init__()
        self.input_channels = input_channels
        self.bottleneck_channels = bottleneck_channels
        self.g = torch.nn.Conv2d(
                                 self.input_channels, 
                                 self.bottleneck_channels,
                                 kernel_size = kernel_size,
                                 padding = kernel_size // 2,
                                 bias = bias
                                )
        self.W_z = torch.nn.Sequential(
                                       torch.nn.Conv2d(
                                                       self.bottleneck_channels,
                                                       self.input_channels, 
                                                       kernel_size = kernel_size,
                                                       bias = bias,
                                                       padding = kernel_size // 2
                                                      ),
                                       torch.nn.BatchNorm2d(self.input_channels)
                                      )
        torch.nn.init.constant_(self.W_z[1].weight, 0)   
        torch.nn.init.constant_(self.W_z[1].bias, 0)


    def forward(self, x):
        """
        Forward model [zi = Wzyi + xi]

        Parameters
        ----------
        x               : torch.tensor
                          First input data.                       


        Returns
        ----------
        z               : torch.tensor
                          Estimated output.
        """
        batch_size, channels, height, width = x.size()
        theta = x.view(batch_size, channels, -1).permute(0, 2, 1)
        phi = x.view(batch_size, channels, -1).permute(0, 2, 1)
        g = self.g(x).view(batch_size, self.bottleneck_channels, -1).permute(0, 2, 1)
        attn = torch.bmm(theta, phi.transpose(1, 2)) / (height * width)
        attn = torch.nn.functional.softmax(attn, dim=-1)
        y = torch.bmm(attn, g).permute(0, 2, 1).contiguous().view(batch_size, self.bottleneck_channels, height, width)
        W_y = self.W_z(y)
        z = W_y + x
        return z

__init__(input_channels=1024, bottleneck_channels=512, kernel_size=1, bias=False)

Parameters:

  • input_channels
                  Number of input channels.
    
  • bottleneck_channels (int, default: 512 ) –
                  Number of middle channels.
    
  • kernel_size
                  Kernel size.
    
  • bias
                  Set to True to let convolutional layers have bias term.
    
Source code in odak/learn/models/components.py
def __init__(
             self,
             input_channels = 1024,
             bottleneck_channels = 512,
             kernel_size = 1,
             bias = False,
            ):
    """

    Parameters
    ----------
    input_channels      : int
                          Number of input channels.
    bottleneck_channels : int
                          Number of middle channels.
    kernel_size         : int
                          Kernel size.
    bias                : bool 
                          Set to True to let convolutional layers have bias term.
    """
    super(non_local_layer, self).__init__()
    self.input_channels = input_channels
    self.bottleneck_channels = bottleneck_channels
    self.g = torch.nn.Conv2d(
                             self.input_channels, 
                             self.bottleneck_channels,
                             kernel_size = kernel_size,
                             padding = kernel_size // 2,
                             bias = bias
                            )
    self.W_z = torch.nn.Sequential(
                                   torch.nn.Conv2d(
                                                   self.bottleneck_channels,
                                                   self.input_channels, 
                                                   kernel_size = kernel_size,
                                                   bias = bias,
                                                   padding = kernel_size // 2
                                                  ),
                                   torch.nn.BatchNorm2d(self.input_channels)
                                  )
    torch.nn.init.constant_(self.W_z[1].weight, 0)   
    torch.nn.init.constant_(self.W_z[1].bias, 0)

forward(x)

Forward model [zi = Wzyi + xi]

Parameters:

  • x
              First input data.
    

Returns:

  • z ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model [zi = Wzyi + xi]

    Parameters
    ----------
    x               : torch.tensor
                      First input data.                       


    Returns
    ----------
    z               : torch.tensor
                      Estimated output.
    """
    batch_size, channels, height, width = x.size()
    theta = x.view(batch_size, channels, -1).permute(0, 2, 1)
    phi = x.view(batch_size, channels, -1).permute(0, 2, 1)
    g = self.g(x).view(batch_size, self.bottleneck_channels, -1).permute(0, 2, 1)
    attn = torch.bmm(theta, phi.transpose(1, 2)) / (height * width)
    attn = torch.nn.functional.softmax(attn, dim=-1)
    y = torch.bmm(attn, g).permute(0, 2, 1).contiguous().view(batch_size, self.bottleneck_channels, height, width)
    W_y = self.W_z(y)
    z = W_y + x
    return z

normalization

Bases: Module

A normalization layer.

Source code in odak/learn/models/components.py
class normalization(torch.nn.Module):
    """
    A normalization layer.
    """
    def __init__(
                 self,
                 dim = 1,
                ):
        """
        Normalization layer.


        Parameters
        ----------
        dim             : int
                          Dimension (axis) to normalize.
        """
        super().__init__()
        self.k = torch.nn.Parameter(torch.ones(1, dim, 1, 1))


    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x             : torch.tensor
                        Input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.      
        """
        eps = 1e-5 if x.dtype == torch.float32 else 1e-3
        var = torch.var(x, dim = 1, unbiased = False, keepdim = True)
        mean = torch.mean(x, dim = 1, keepdim = True)
        result =  (x - mean) * (var + eps).rsqrt() * self.k
        return result 

__init__(dim=1)

Normalization layer.

Parameters:

  • dim
              Dimension (axis) to normalize.
    
Source code in odak/learn/models/components.py
def __init__(
             self,
             dim = 1,
            ):
    """
    Normalization layer.


    Parameters
    ----------
    dim             : int
                      Dimension (axis) to normalize.
    """
    super().__init__()
    self.k = torch.nn.Parameter(torch.ones(1, dim, 1, 1))

forward(x)

Forward model.

Parameters:

  • x
            Input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x             : torch.tensor
                    Input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.      
    """
    eps = 1e-5 if x.dtype == torch.float32 else 1e-3
    var = torch.var(x, dim = 1, unbiased = False, keepdim = True)
    mean = torch.mean(x, dim = 1, keepdim = True)
    result =  (x - mean) * (var + eps).rsqrt() * self.k
    return result 

positional_encoder

Bases: Module

A positional encoder module.

Source code in odak/learn/models/components.py
class positional_encoder(torch.nn.Module):
    """
    A positional encoder module.
    """

    def __init__(self, L):
        """
        A positional encoder module.

        Parameters
        ----------
        L                   : int
                              Positional encoding level.
        """
        super(positional_encoder, self).__init__()
        self.L = L


    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x               : torch.tensor
                          Input data.

        Returns
        ----------
        result          : torch.tensor
                          Result of the forward operation
        """
        B, C = x.shape
        x = x.view(B, C, 1)
        results = [x]
        for i in range(1, self.L + 1):
            freq = (2 ** i) * torch.pi
            cos_x = torch.cos(freq * x)
            sin_x = torch.sin(freq * x)
            results.append(cos_x)
            results.append(sin_x)
        results = torch.cat(results, dim=2)
        results = results.permute(0, 2, 1)
        results = results.reshape(B, -1)
        return results

__init__(L)

A positional encoder module.

Parameters:

  • L
                  Positional encoding level.
    
Source code in odak/learn/models/components.py
def __init__(self, L):
    """
    A positional encoder module.

    Parameters
    ----------
    L                   : int
                          Positional encoding level.
    """
    super(positional_encoder, self).__init__()
    self.L = L

forward(x)

Forward model.

Parameters:

  • x
              Input data.
    

Returns:

  • result ( tensor ) –

    Result of the forward operation

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x               : torch.tensor
                      Input data.

    Returns
    ----------
    result          : torch.tensor
                      Result of the forward operation
    """
    B, C = x.shape
    x = x.view(B, C, 1)
    results = [x]
    for i in range(1, self.L + 1):
        freq = (2 ** i) * torch.pi
        cos_x = torch.cos(freq * x)
        sin_x = torch.sin(freq * x)
        results.append(cos_x)
        results.append(sin_x)
    results = torch.cat(results, dim=2)
    results = results.permute(0, 2, 1)
    results = results.reshape(B, -1)
    return results

residual_attention_layer

Bases: Module

A residual block with an attention layer.

Source code in odak/learn/models/components.py
class residual_attention_layer(torch.nn.Module):
    """
    A residual block with an attention layer.
    """
    def __init__(
                 self,
                 input_channels = 2,
                 output_channels = 2,
                 kernel_size = 1,
                 bias = False,
                 activation = torch.nn.ReLU()
                ):
        """
        An attention layer class.


        Parameters
        ----------
        input_channels  : int or optioal
                          Number of input channels.
        output_channels : int or optional
                          Number of middle channels.
        kernel_size     : int or optional
                          Kernel size.
        bias            : bool or optional
                          Set to True to let convolutional layers have bias term.
        activation      : torch.nn or optional
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        self.activation = activation
        self.convolution0 = torch.nn.Sequential(
                                                torch.nn.Conv2d(
                                                                input_channels,
                                                                output_channels,
                                                                kernel_size = kernel_size,
                                                                padding = kernel_size // 2,
                                                                bias = bias
                                                               ),
                                                torch.nn.BatchNorm2d(output_channels)
                                               )
        self.convolution1 = torch.nn.Sequential(
                                                torch.nn.Conv2d(
                                                                input_channels,
                                                                output_channels,
                                                                kernel_size = kernel_size,
                                                                padding = kernel_size // 2,
                                                                bias = bias
                                                               ),
                                                torch.nn.BatchNorm2d(output_channels)
                                               )
        self.final_layer = torch.nn.Sequential(
                                               self.activation,
                                               torch.nn.Conv2d(
                                                               output_channels,
                                                               output_channels,
                                                               kernel_size = kernel_size,
                                                               padding = kernel_size // 2,
                                                               bias = bias
                                                              )
                                              )


    def forward(self, x0, x1):
        """
        Forward model.

        Parameters
        ----------
        x0             : torch.tensor
                         First input data.

        x1             : torch.tensor
                         Seconnd input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.      
        """
        y0 = self.convolution0(x0)
        y1 = self.convolution1(x1)
        y2 = torch.add(y0, y1)
        result = self.final_layer(y2) * x0
        return result

__init__(input_channels=2, output_channels=2, kernel_size=1, bias=False, activation=torch.nn.ReLU())

An attention layer class.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int or optional, default: 2 ) –
              Number of middle channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
             self,
             input_channels = 2,
             output_channels = 2,
             kernel_size = 1,
             bias = False,
             activation = torch.nn.ReLU()
            ):
    """
    An attention layer class.


    Parameters
    ----------
    input_channels  : int or optioal
                      Number of input channels.
    output_channels : int or optional
                      Number of middle channels.
    kernel_size     : int or optional
                      Kernel size.
    bias            : bool or optional
                      Set to True to let convolutional layers have bias term.
    activation      : torch.nn or optional
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    self.activation = activation
    self.convolution0 = torch.nn.Sequential(
                                            torch.nn.Conv2d(
                                                            input_channels,
                                                            output_channels,
                                                            kernel_size = kernel_size,
                                                            padding = kernel_size // 2,
                                                            bias = bias
                                                           ),
                                            torch.nn.BatchNorm2d(output_channels)
                                           )
    self.convolution1 = torch.nn.Sequential(
                                            torch.nn.Conv2d(
                                                            input_channels,
                                                            output_channels,
                                                            kernel_size = kernel_size,
                                                            padding = kernel_size // 2,
                                                            bias = bias
                                                           ),
                                            torch.nn.BatchNorm2d(output_channels)
                                           )
    self.final_layer = torch.nn.Sequential(
                                           self.activation,
                                           torch.nn.Conv2d(
                                                           output_channels,
                                                           output_channels,
                                                           kernel_size = kernel_size,
                                                           padding = kernel_size // 2,
                                                           bias = bias
                                                          )
                                          )

forward(x0, x1)

Forward model.

Parameters:

  • x0
             First input data.
    
  • x1
             Seconnd input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x0, x1):
    """
    Forward model.

    Parameters
    ----------
    x0             : torch.tensor
                     First input data.

    x1             : torch.tensor
                     Seconnd input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.      
    """
    y0 = self.convolution0(x0)
    y1 = self.convolution1(x1)
    y2 = torch.add(y0, y1)
    result = self.final_layer(y2) * x0
    return result

residual_layer

Bases: Module

A residual layer.

Source code in odak/learn/models/components.py
class residual_layer(torch.nn.Module):
    """
    A residual layer.
    """
    def __init__(
                 self,
                 input_channels = 2,
                 mid_channels = 16,
                 kernel_size = 3,
                 bias = False,
                 activation = torch.nn.ReLU()
                ):
        """
        A convolutional layer class.


        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        mid_channels    : int
                          Number of middle channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool 
                          Set to True to let convolutional layers have bias term.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        self.activation = activation
        self.convolution = double_convolution(
                                              input_channels,
                                              mid_channels = mid_channels,
                                              output_channels = input_channels,
                                              kernel_size = kernel_size,
                                              bias = bias,
                                              activation = activation
                                             )


    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x             : torch.tensor
                        Input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.      
        """
        x0 = self.convolution(x)
        return x + x0

__init__(input_channels=2, mid_channels=16, kernel_size=3, bias=False, activation=torch.nn.ReLU())

A convolutional layer class.

Parameters:

  • input_channels
              Number of input channels.
    
  • mid_channels
              Number of middle channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
             self,
             input_channels = 2,
             mid_channels = 16,
             kernel_size = 3,
             bias = False,
             activation = torch.nn.ReLU()
            ):
    """
    A convolutional layer class.


    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    mid_channels    : int
                      Number of middle channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool 
                      Set to True to let convolutional layers have bias term.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    self.activation = activation
    self.convolution = double_convolution(
                                          input_channels,
                                          mid_channels = mid_channels,
                                          output_channels = input_channels,
                                          kernel_size = kernel_size,
                                          bias = bias,
                                          activation = activation
                                         )

forward(x)

Forward model.

Parameters:

  • x
            Input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x             : torch.tensor
                    Input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.      
    """
    x0 = self.convolution(x)
    return x + x0

spatial_gate

Bases: Module

Spatial attention module that applies a convolution layer after channel pooling. This class is heavily inspired by https://github.com/Jongchan/attention-module/blob/master/MODELS/cbam.py.

Source code in odak/learn/models/components.py
class spatial_gate(torch.nn.Module):
    """
    Spatial attention module that applies a convolution layer after channel pooling.
    This class is heavily inspired by https://github.com/Jongchan/attention-module/blob/master/MODELS/cbam.py.
    """
    def __init__(self):
        """
        Initializes the spatial gate module.
        """
        super().__init__()
        kernel_size = 7
        self.spatial = convolution_layer(2, 1, kernel_size, bias = False, activation = torch.nn.Identity())


    def channel_pool(self, x):
        """
        Applies max and average pooling on the channels.

        Parameters
        ----------
        x             : torch.tensor
                        Input tensor.

        Returns
        -------
        output        : torch.tensor
                        Output tensor.
        """
        max_pool = torch.max(x, 1)[0].unsqueeze(1)
        avg_pool = torch.mean(x, 1).unsqueeze(1)
        output = torch.cat((max_pool, avg_pool), dim=1)
        return output


    def forward(self, x):
        """
        Forward pass of the SpatialGate module.

        Applies spatial attention to the input tensor.

        Parameters
        ----------
        x            : torch.tensor
                       Input tensor to the SpatialGate module.

        Returns
        -------
        scaled_x     : torch.tensor
                       Output tensor after applying spatial attention.
        """
        x_compress = self.channel_pool(x)
        x_out = self.spatial(x_compress)
        scale = torch.sigmoid(x_out)
        scaled_x = x * scale
        return scaled_x

__init__()

Initializes the spatial gate module.

Source code in odak/learn/models/components.py
def __init__(self):
    """
    Initializes the spatial gate module.
    """
    super().__init__()
    kernel_size = 7
    self.spatial = convolution_layer(2, 1, kernel_size, bias = False, activation = torch.nn.Identity())

channel_pool(x)

Applies max and average pooling on the channels.

Parameters:

  • x
            Input tensor.
    

Returns:

  • output ( tensor ) –

    Output tensor.

Source code in odak/learn/models/components.py
def channel_pool(self, x):
    """
    Applies max and average pooling on the channels.

    Parameters
    ----------
    x             : torch.tensor
                    Input tensor.

    Returns
    -------
    output        : torch.tensor
                    Output tensor.
    """
    max_pool = torch.max(x, 1)[0].unsqueeze(1)
    avg_pool = torch.mean(x, 1).unsqueeze(1)
    output = torch.cat((max_pool, avg_pool), dim=1)
    return output

forward(x)

Forward pass of the SpatialGate module.

Applies spatial attention to the input tensor.

Parameters:

  • x
           Input tensor to the SpatialGate module.
    

Returns:

  • scaled_x ( tensor ) –

    Output tensor after applying spatial attention.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward pass of the SpatialGate module.

    Applies spatial attention to the input tensor.

    Parameters
    ----------
    x            : torch.tensor
                   Input tensor to the SpatialGate module.

    Returns
    -------
    scaled_x     : torch.tensor
                   Output tensor after applying spatial attention.
    """
    x_compress = self.channel_pool(x)
    x_out = self.spatial(x_compress)
    scale = torch.sigmoid(x_out)
    scaled_x = x * scale
    return scaled_x

spatially_adaptive_convolution

Bases: Module

A spatially adaptive convolution layer.

References

C. Zheng et al. "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions." C. Xu et al. "Squeezesegv3: Spatially-adaptive Convolution for Efficient Point-Cloud Segmentation." C. Zheng et al. "Windowing Decomposition Convolutional Neural Network for Image Enhancement."

Source code in odak/learn/models/components.py
class spatially_adaptive_convolution(torch.nn.Module):
    """
    A spatially adaptive convolution layer.

    References
    ----------

    C. Zheng et al. "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions."
    C. Xu et al. "Squeezesegv3: Spatially-adaptive Convolution for Efficient Point-Cloud Segmentation."
    C. Zheng et al. "Windowing Decomposition Convolutional Neural Network for Image Enhancement."
    """
    def __init__(
                 self,
                 input_channels = 2,
                 output_channels = 2,
                 kernel_size = 3,
                 stride = 1,
                 padding = 1,
                 bias = False,
                 activation = torch.nn.LeakyReLU(0.2, inplace = True)
                ):
        """
        Initializes a spatially adaptive convolution layer.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Size of the convolution kernel.
        stride          : int
                          Stride of the convolution.
        padding         : int
                          Padding added to both sides of the input.
        bias            : bool
                          If True, includes a bias term in the convolution.
        activation      : torch.nn.Module
                          Activation function to apply. If None, no activation is applied.
        """
        super(spatially_adaptive_convolution, self).__init__()
        self.kernel_size = kernel_size
        self.input_channels = input_channels
        self.output_channels = output_channels
        self.stride = stride
        self.padding = padding
        self.standard_convolution = torch.nn.Conv2d(
                                                    in_channels = input_channels,
                                                    out_channels = self.output_channels,
                                                    kernel_size = kernel_size,
                                                    stride = stride,
                                                    padding = padding,
                                                    bias = bias
                                                   )
        self.weight = torch.nn.Parameter(data = self.standard_convolution.weight, requires_grad = True)
        self.activation = activation


    def forward(self, x, sv_kernel_feature):
        """
        Forward pass for the spatially adaptive convolution layer.

        Parameters
        ----------
        x                  : torch.tensor
                            Input data tensor.
                            Dimension: (1, C, H, W)
        sv_kernel_feature   : torch.tensor
                            Spatially varying kernel features.
                            Dimension: (1, C_i * kernel_size * kernel_size, H, W)

        Returns
        -------
        sa_output          : torch.tensor
                            Estimated output tensor.
                            Dimension: (1, output_channels, H_out, W_out)
        """
        # Pad input and sv_kernel_feature if necessary
        if sv_kernel_feature.size(-1) * self.stride != x.size(-1) or sv_kernel_feature.size(
                -2) * self.stride != x.size(-2):
            diffY = sv_kernel_feature.size(-2) % self.stride
            diffX = sv_kernel_feature.size(-1) % self.stride
            sv_kernel_feature = torch.nn.functional.pad(sv_kernel_feature, (diffX // 2, diffX - diffX // 2,
                                                                            diffY // 2, diffY - diffY // 2))
            diffY = x.size(-2) % self.stride
            diffX = x.size(-1) % self.stride
            x = torch.nn.functional.pad(x, (diffX // 2, diffX - diffX // 2,
                                            diffY // 2, diffY - diffY // 2))

        # Unfold the input tensor for matrix multiplication
        input_feature = torch.nn.functional.unfold(
                                                   x,
                                                   kernel_size = (self.kernel_size, self.kernel_size),
                                                   stride = self.stride,
                                                   padding = self.padding
                                                  )

        # Resize sv_kernel_feature to match the input feature
        sv_kernel = sv_kernel_feature.reshape(
                                              1,
                                              self.input_channels * self.kernel_size * self.kernel_size,
                                              (x.size(-2) // self.stride) * (x.size(-1) // self.stride)
                                             )

        # Resize weight to match the input channels and kernel size
        si_kernel = self.weight.reshape(
                                        self.weight_output_channels,
                                        self.input_channels * self.kernel_size * self.kernel_size
                                       )

        # Apply spatially varying kernels
        sv_feature = input_feature * sv_kernel

        # Perform matrix multiplication
        sa_output = torch.matmul(si_kernel, sv_feature).reshape(
                                                                1, self.weight_output_channels,
                                                                (x.size(-2) // self.stride),
                                                                (x.size(-1) // self.stride)
                                                               )
        return sa_output

__init__(input_channels=2, output_channels=2, kernel_size=3, stride=1, padding=1, bias=False, activation=torch.nn.LeakyReLU(0.2, inplace=True))

Initializes a spatially adaptive convolution layer.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int, default: 2 ) –
              Number of output channels.
    
  • kernel_size
              Size of the convolution kernel.
    
  • stride
              Stride of the convolution.
    
  • padding
              Padding added to both sides of the input.
    
  • bias
              If True, includes a bias term in the convolution.
    
  • activation
              Activation function to apply. If None, no activation is applied.
    
Source code in odak/learn/models/components.py
def __init__(
             self,
             input_channels = 2,
             output_channels = 2,
             kernel_size = 3,
             stride = 1,
             padding = 1,
             bias = False,
             activation = torch.nn.LeakyReLU(0.2, inplace = True)
            ):
    """
    Initializes a spatially adaptive convolution layer.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Size of the convolution kernel.
    stride          : int
                      Stride of the convolution.
    padding         : int
                      Padding added to both sides of the input.
    bias            : bool
                      If True, includes a bias term in the convolution.
    activation      : torch.nn.Module
                      Activation function to apply. If None, no activation is applied.
    """
    super(spatially_adaptive_convolution, self).__init__()
    self.kernel_size = kernel_size
    self.input_channels = input_channels
    self.output_channels = output_channels
    self.stride = stride
    self.padding = padding
    self.standard_convolution = torch.nn.Conv2d(
                                                in_channels = input_channels,
                                                out_channels = self.output_channels,
                                                kernel_size = kernel_size,
                                                stride = stride,
                                                padding = padding,
                                                bias = bias
                                               )
    self.weight = torch.nn.Parameter(data = self.standard_convolution.weight, requires_grad = True)
    self.activation = activation

forward(x, sv_kernel_feature)

Forward pass for the spatially adaptive convolution layer.

Parameters:

  • x
                Input data tensor.
                Dimension: (1, C, H, W)
    
  • sv_kernel_feature
                Spatially varying kernel features.
                Dimension: (1, C_i * kernel_size * kernel_size, H, W)
    

Returns:

  • sa_output ( tensor ) –

    Estimated output tensor. Dimension: (1, output_channels, H_out, W_out)

Source code in odak/learn/models/components.py
def forward(self, x, sv_kernel_feature):
    """
    Forward pass for the spatially adaptive convolution layer.

    Parameters
    ----------
    x                  : torch.tensor
                        Input data tensor.
                        Dimension: (1, C, H, W)
    sv_kernel_feature   : torch.tensor
                        Spatially varying kernel features.
                        Dimension: (1, C_i * kernel_size * kernel_size, H, W)

    Returns
    -------
    sa_output          : torch.tensor
                        Estimated output tensor.
                        Dimension: (1, output_channels, H_out, W_out)
    """
    # Pad input and sv_kernel_feature if necessary
    if sv_kernel_feature.size(-1) * self.stride != x.size(-1) or sv_kernel_feature.size(
            -2) * self.stride != x.size(-2):
        diffY = sv_kernel_feature.size(-2) % self.stride
        diffX = sv_kernel_feature.size(-1) % self.stride
        sv_kernel_feature = torch.nn.functional.pad(sv_kernel_feature, (diffX // 2, diffX - diffX // 2,
                                                                        diffY // 2, diffY - diffY // 2))
        diffY = x.size(-2) % self.stride
        diffX = x.size(-1) % self.stride
        x = torch.nn.functional.pad(x, (diffX // 2, diffX - diffX // 2,
                                        diffY // 2, diffY - diffY // 2))

    # Unfold the input tensor for matrix multiplication
    input_feature = torch.nn.functional.unfold(
                                               x,
                                               kernel_size = (self.kernel_size, self.kernel_size),
                                               stride = self.stride,
                                               padding = self.padding
                                              )

    # Resize sv_kernel_feature to match the input feature
    sv_kernel = sv_kernel_feature.reshape(
                                          1,
                                          self.input_channels * self.kernel_size * self.kernel_size,
                                          (x.size(-2) // self.stride) * (x.size(-1) // self.stride)
                                         )

    # Resize weight to match the input channels and kernel size
    si_kernel = self.weight.reshape(
                                    self.weight_output_channels,
                                    self.input_channels * self.kernel_size * self.kernel_size
                                   )

    # Apply spatially varying kernels
    sv_feature = input_feature * sv_kernel

    # Perform matrix multiplication
    sa_output = torch.matmul(si_kernel, sv_feature).reshape(
                                                            1, self.weight_output_channels,
                                                            (x.size(-2) // self.stride),
                                                            (x.size(-1) // self.stride)
                                                           )
    return sa_output

spatially_adaptive_module

Bases: Module

A spatially adaptive module that combines learned spatially adaptive convolutions.

References

Chuanjun Zheng, Yicheng Zhan, Liang Shi, Ozan Cakmakci, and Kaan Akşit, "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions," SIGGRAPH Asia 2024 Technical Communications (SA Technical Communications '24), December, 2024.

Source code in odak/learn/models/components.py
class spatially_adaptive_module(torch.nn.Module):
    """
    A spatially adaptive module that combines learned spatially adaptive convolutions.

    References
    ----------

    Chuanjun Zheng, Yicheng Zhan, Liang Shi, Ozan Cakmakci, and Kaan Akşit, "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions," SIGGRAPH Asia 2024 Technical Communications (SA Technical Communications '24), December, 2024.
    """
    def __init__(
                 self,
                 input_channels = 2,
                 output_channels = 2,
                 kernel_size = 3,
                 stride = 1,
                 padding = 1,
                 bias = False,
                 activation = torch.nn.LeakyReLU(0.2, inplace = True)
                ):
        """
        Initializes a spatially adaptive module.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Size of the convolution kernel.
        stride          : int
                          Stride of the convolution.
        padding         : int
                          Padding added to both sides of the input.
        bias            : bool
                          If True, includes a bias term in the convolution.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super(spatially_adaptive_module, self).__init__()
        self.kernel_size = kernel_size
        self.input_channels = input_channels
        self.output_channels = output_channels
        self.stride = stride
        self.padding = padding
        self.weight_output_channels = self.output_channels - 1
        self.standard_convolution = torch.nn.Conv2d(
                                                    in_channels = input_channels,
                                                    out_channels = self.weight_output_channels,
                                                    kernel_size = kernel_size,
                                                    stride = stride,
                                                    padding = padding,
                                                    bias = bias
                                                   )
        self.weight = torch.nn.Parameter(data = self.standard_convolution.weight, requires_grad = True)
        self.activation = activation


    def forward(self, x, sv_kernel_feature):
        """
        Forward pass for the spatially adaptive module.

        Parameters
        ----------
        x                  : torch.tensor
                            Input data tensor.
                            Dimension: (1, C, H, W)
        sv_kernel_feature   : torch.tensor
                            Spatially varying kernel features.
                            Dimension: (1, C_i * kernel_size * kernel_size, H, W)

        Returns
        -------
        output             : torch.tensor
                            Combined output tensor from standard and spatially adaptive convolutions.
                            Dimension: (1, output_channels, H_out, W_out)
        """
        # Pad input and sv_kernel_feature if necessary
        if sv_kernel_feature.size(-1) * self.stride != x.size(-1) or sv_kernel_feature.size(
                -2) * self.stride != x.size(-2):
            diffY = sv_kernel_feature.size(-2) % self.stride
            diffX = sv_kernel_feature.size(-1) % self.stride
            sv_kernel_feature = torch.nn.functional.pad(sv_kernel_feature, (diffX // 2, diffX - diffX // 2,
                                                                            diffY // 2, diffY - diffY // 2))
            diffY = x.size(-2) % self.stride
            diffX = x.size(-1) % self.stride
            x = torch.nn.functional.pad(x, (diffX // 2, diffX - diffX // 2,
                                            diffY // 2, diffY - diffY // 2))

        # Unfold the input tensor for matrix multiplication
        input_feature = torch.nn.functional.unfold(
                                                   x,
                                                   kernel_size = (self.kernel_size, self.kernel_size),
                                                   stride = self.stride,
                                                   padding = self.padding
                                                  )

        # Resize sv_kernel_feature to match the input feature
        sv_kernel = sv_kernel_feature.reshape(
                                              1,
                                              self.input_channels * self.kernel_size * self.kernel_size,
                                              (x.size(-2) // self.stride) * (x.size(-1) // self.stride)
                                             )

        # Apply sv_kernel to the input_feature
        sv_feature = input_feature * sv_kernel

        # Original spatially varying convolution output
        sv_output = torch.sum(sv_feature, dim = 1).reshape(
                                                           1,
                                                            1,
                                                            (x.size(-2) // self.stride),
                                                            (x.size(-1) // self.stride)
                                                           )

        # Reshape weight for spatially adaptive convolution
        si_kernel = self.weight.reshape(
                                        self.weight_output_channels,
                                        self.input_channels * self.kernel_size * self.kernel_size
                                       )

        # Apply si_kernel on sv convolution output
        sa_output = torch.matmul(si_kernel, sv_feature).reshape(
                                                                1, self.weight_output_channels,
                                                                (x.size(-2) // self.stride),
                                                                (x.size(-1) // self.stride)
                                                               )

        # Combine the outputs and apply activation function
        output = self.activation(torch.cat((sv_output, sa_output), dim = 1))
        return output

__init__(input_channels=2, output_channels=2, kernel_size=3, stride=1, padding=1, bias=False, activation=torch.nn.LeakyReLU(0.2, inplace=True))

Initializes a spatially adaptive module.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int, default: 2 ) –
              Number of output channels.
    
  • kernel_size
              Size of the convolution kernel.
    
  • stride
              Stride of the convolution.
    
  • padding
              Padding added to both sides of the input.
    
  • bias
              If True, includes a bias term in the convolution.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
             self,
             input_channels = 2,
             output_channels = 2,
             kernel_size = 3,
             stride = 1,
             padding = 1,
             bias = False,
             activation = torch.nn.LeakyReLU(0.2, inplace = True)
            ):
    """
    Initializes a spatially adaptive module.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Size of the convolution kernel.
    stride          : int
                      Stride of the convolution.
    padding         : int
                      Padding added to both sides of the input.
    bias            : bool
                      If True, includes a bias term in the convolution.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super(spatially_adaptive_module, self).__init__()
    self.kernel_size = kernel_size
    self.input_channels = input_channels
    self.output_channels = output_channels
    self.stride = stride
    self.padding = padding
    self.weight_output_channels = self.output_channels - 1
    self.standard_convolution = torch.nn.Conv2d(
                                                in_channels = input_channels,
                                                out_channels = self.weight_output_channels,
                                                kernel_size = kernel_size,
                                                stride = stride,
                                                padding = padding,
                                                bias = bias
                                               )
    self.weight = torch.nn.Parameter(data = self.standard_convolution.weight, requires_grad = True)
    self.activation = activation

forward(x, sv_kernel_feature)

Forward pass for the spatially adaptive module.

Parameters:

  • x
                Input data tensor.
                Dimension: (1, C, H, W)
    
  • sv_kernel_feature
                Spatially varying kernel features.
                Dimension: (1, C_i * kernel_size * kernel_size, H, W)
    

Returns:

  • output ( tensor ) –

    Combined output tensor from standard and spatially adaptive convolutions. Dimension: (1, output_channels, H_out, W_out)

Source code in odak/learn/models/components.py
def forward(self, x, sv_kernel_feature):
    """
    Forward pass for the spatially adaptive module.

    Parameters
    ----------
    x                  : torch.tensor
                        Input data tensor.
                        Dimension: (1, C, H, W)
    sv_kernel_feature   : torch.tensor
                        Spatially varying kernel features.
                        Dimension: (1, C_i * kernel_size * kernel_size, H, W)

    Returns
    -------
    output             : torch.tensor
                        Combined output tensor from standard and spatially adaptive convolutions.
                        Dimension: (1, output_channels, H_out, W_out)
    """
    # Pad input and sv_kernel_feature if necessary
    if sv_kernel_feature.size(-1) * self.stride != x.size(-1) or sv_kernel_feature.size(
            -2) * self.stride != x.size(-2):
        diffY = sv_kernel_feature.size(-2) % self.stride
        diffX = sv_kernel_feature.size(-1) % self.stride
        sv_kernel_feature = torch.nn.functional.pad(sv_kernel_feature, (diffX // 2, diffX - diffX // 2,
                                                                        diffY // 2, diffY - diffY // 2))
        diffY = x.size(-2) % self.stride
        diffX = x.size(-1) % self.stride
        x = torch.nn.functional.pad(x, (diffX // 2, diffX - diffX // 2,
                                        diffY // 2, diffY - diffY // 2))

    # Unfold the input tensor for matrix multiplication
    input_feature = torch.nn.functional.unfold(
                                               x,
                                               kernel_size = (self.kernel_size, self.kernel_size),
                                               stride = self.stride,
                                               padding = self.padding
                                              )

    # Resize sv_kernel_feature to match the input feature
    sv_kernel = sv_kernel_feature.reshape(
                                          1,
                                          self.input_channels * self.kernel_size * self.kernel_size,
                                          (x.size(-2) // self.stride) * (x.size(-1) // self.stride)
                                         )

    # Apply sv_kernel to the input_feature
    sv_feature = input_feature * sv_kernel

    # Original spatially varying convolution output
    sv_output = torch.sum(sv_feature, dim = 1).reshape(
                                                       1,
                                                        1,
                                                        (x.size(-2) // self.stride),
                                                        (x.size(-1) // self.stride)
                                                       )

    # Reshape weight for spatially adaptive convolution
    si_kernel = self.weight.reshape(
                                    self.weight_output_channels,
                                    self.input_channels * self.kernel_size * self.kernel_size
                                   )

    # Apply si_kernel on sv convolution output
    sa_output = torch.matmul(si_kernel, sv_feature).reshape(
                                                            1, self.weight_output_channels,
                                                            (x.size(-2) // self.stride),
                                                            (x.size(-1) // self.stride)
                                                           )

    # Combine the outputs and apply activation function
    output = self.activation(torch.cat((sv_output, sa_output), dim = 1))
    return output

spatially_adaptive_unet

Bases: Module

Spatially varying U-Net model based on spatially adaptive convolution.

References

Chuanjun Zheng, Yicheng Zhan, Liang Shi, Ozan Cakmakci, and Kaan Akşit, "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions," SIGGRAPH Asia 2024 Technical Communications (SA Technical Communications '24), December, 2024.

Source code in odak/learn/models/models.py
class spatially_adaptive_unet(torch.nn.Module):
    """
    Spatially varying U-Net model based on spatially adaptive convolution.

    References
    ----------

    Chuanjun Zheng, Yicheng Zhan, Liang Shi, Ozan Cakmakci, and Kaan Akşit, "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions," SIGGRAPH Asia 2024 Technical Communications (SA Technical Communications '24), December, 2024.
    """
    def __init__(
                 self,
                 depth=3,
                 dimensions=8,
                 input_channels=6,
                 out_channels=6,
                 kernel_size=3,
                 bias=True,
                 normalization=False,
                 activation=torch.nn.LeakyReLU(0.2, inplace=True)
                ):
        """
        U-Net model.

        Parameters
        ----------
        depth          : int
                         Number of upsampling and downsampling layers.
        dimensions     : int
                         Number of dimensions.
        input_channels : int
                         Number of input channels.
        out_channels   : int
                         Number of output channels.
        bias           : bool
                         Set to True to let convolutional layers learn a bias term.
        normalization  : bool
                         If True, adds a Batch Normalization layer after the convolutional layer.
        activation     : torch.nn
                         Non-linear activation layer (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()).
        """
        super().__init__()
        self.depth = depth
        self.out_channels = out_channels
        self.inc = convolution_layer(
                                     input_channels=input_channels,
                                     output_channels=dimensions,
                                     kernel_size=kernel_size,
                                     bias=bias,
                                     normalization=normalization,
                                     activation=activation
                                    )

        self.encoder = torch.nn.ModuleList()
        for i in range(self.depth + 1):  # Downsampling layers
            down_in_channels = dimensions * (2 ** i)
            down_out_channels = 2 * down_in_channels
            pooling_layer = torch.nn.AvgPool2d(2)
            double_convolution_layer = double_convolution(
                                                          input_channels=down_in_channels,
                                                          mid_channels=down_in_channels,
                                                          output_channels=down_in_channels,
                                                          kernel_size=kernel_size,
                                                          bias=bias,
                                                          normalization=normalization,
                                                          activation=activation
                                                         )
            sam = spatially_adaptive_module(
                                            input_channels=down_in_channels,
                                            output_channels=down_out_channels,
                                            kernel_size=kernel_size,
                                            bias=bias,
                                            activation=activation
                                           )
            self.encoder.append(torch.nn.ModuleList([pooling_layer, double_convolution_layer, sam]))
        self.global_feature_module = torch.nn.ModuleList()
        double_convolution_layer = double_convolution(
                                                      input_channels=dimensions * (2 ** (depth + 1)),
                                                      mid_channels=dimensions * (2 ** (depth + 1)),
                                                      output_channels=dimensions * (2 ** (depth + 1)),
                                                      kernel_size=kernel_size,
                                                      bias=bias,
                                                      normalization=normalization,
                                                      activation=activation
                                                     )
        global_feature_layer = global_feature_module(
                                                     input_channels=dimensions * (2 ** (depth + 1)),
                                                     mid_channels=dimensions * (2 ** (depth + 1)),
                                                     output_channels=dimensions * (2 ** (depth + 1)),
                                                     kernel_size=kernel_size,
                                                     bias=bias,
                                                     activation=torch.nn.LeakyReLU(0.2, inplace=True)
                                                    )
        self.global_feature_module.append(torch.nn.ModuleList([double_convolution_layer, global_feature_layer]))
        self.decoder = torch.nn.ModuleList()
        for i in range(depth, -1, -1):
            up_in_channels = dimensions * (2 ** (i + 1))
            up_mid_channels = up_in_channels // 2
            if i == 0:
                up_out_channels = self.out_channels
                upsample_layer = upsample_convtranspose2d_layer(
                                                                input_channels=up_in_channels,
                                                                output_channels=up_mid_channels,
                                                                kernel_size=2,
                                                                stride=2,
                                                                bias=bias,
                                                               )
                conv_layer = torch.nn.Sequential(
                    convolution_layer(
                                      input_channels=up_mid_channels,
                                      output_channels=up_mid_channels,
                                      kernel_size=kernel_size,
                                      bias=bias,
                                      normalization=normalization,
                                      activation=activation,
                                     ),
                    convolution_layer(
                                      input_channels=up_mid_channels,
                                      output_channels=up_out_channels,
                                      kernel_size=1,
                                      bias=bias,
                                      normalization=normalization,
                                      activation=None,
                                     )
                )
                self.decoder.append(torch.nn.ModuleList([upsample_layer, conv_layer]))
            else:
                up_out_channels = up_in_channels // 2
                upsample_layer = upsample_convtranspose2d_layer(
                                                                input_channels=up_in_channels,
                                                                output_channels=up_mid_channels,
                                                                kernel_size=2,
                                                                stride=2,
                                                                bias=bias,
                                                               )
                conv_layer = double_convolution(
                                                input_channels=up_mid_channels,
                                                mid_channels=up_mid_channels,
                                                output_channels=up_out_channels,
                                                kernel_size=kernel_size,
                                                bias=bias,
                                                normalization=normalization,
                                                activation=activation,
                                               )
                self.decoder.append(torch.nn.ModuleList([upsample_layer, conv_layer]))


    def forward(self, sv_kernel, field):
        """
        Forward model.

        Parameters
        ----------
        sv_kernel : list of torch.tensor
                    Learned spatially varying kernels.
                    Dimension of each element in the list: (1, C_i * kernel_size * kernel_size, H_i, W_i),
                    where C_i, H_i, and W_i represent the channel, height, and width
                    of each feature at a certain scale.

        field     : torch.tensor
                    Input field data.
                    Dimension: (1, 6, H, W)

        Returns
        -------
        target_field : torch.tensor
                       Estimated output.
                       Dimension: (1, 6, H, W)
        """
        x = self.inc(field)
        downsampling_outputs = [x]
        for i, down_layer in enumerate(self.encoder):
            x_down = down_layer[0](downsampling_outputs[-1])
            downsampling_outputs.append(x_down)
            sam_output = down_layer[2](x_down + down_layer[1](x_down), sv_kernel[self.depth - i])
            downsampling_outputs.append(sam_output)
        global_feature = self.global_feature_module[0][0](downsampling_outputs[-1])
        global_feature = self.global_feature_module[0][1](downsampling_outputs[-1], global_feature)
        downsampling_outputs.append(global_feature)
        x_up = downsampling_outputs[-1]
        for i, up_layer in enumerate(self.decoder):
            x_up = up_layer[0](x_up, downsampling_outputs[2 * (self.depth - i)])
            x_up = up_layer[1](x_up)
        result = x_up
        return result

__init__(depth=3, dimensions=8, input_channels=6, out_channels=6, kernel_size=3, bias=True, normalization=False, activation=torch.nn.LeakyReLU(0.2, inplace=True))

U-Net model.

Parameters:

  • depth
             Number of upsampling and downsampling layers.
    
  • dimensions
             Number of dimensions.
    
  • input_channels (int, default: 6 ) –
             Number of input channels.
    
  • out_channels
             Number of output channels.
    
  • bias
             Set to True to let convolutional layers learn a bias term.
    
  • normalization
             If True, adds a Batch Normalization layer after the convolutional layer.
    
  • activation
             Non-linear activation layer (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()).
    
Source code in odak/learn/models/models.py
def __init__(
             self,
             depth=3,
             dimensions=8,
             input_channels=6,
             out_channels=6,
             kernel_size=3,
             bias=True,
             normalization=False,
             activation=torch.nn.LeakyReLU(0.2, inplace=True)
            ):
    """
    U-Net model.

    Parameters
    ----------
    depth          : int
                     Number of upsampling and downsampling layers.
    dimensions     : int
                     Number of dimensions.
    input_channels : int
                     Number of input channels.
    out_channels   : int
                     Number of output channels.
    bias           : bool
                     Set to True to let convolutional layers learn a bias term.
    normalization  : bool
                     If True, adds a Batch Normalization layer after the convolutional layer.
    activation     : torch.nn
                     Non-linear activation layer (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()).
    """
    super().__init__()
    self.depth = depth
    self.out_channels = out_channels
    self.inc = convolution_layer(
                                 input_channels=input_channels,
                                 output_channels=dimensions,
                                 kernel_size=kernel_size,
                                 bias=bias,
                                 normalization=normalization,
                                 activation=activation
                                )

    self.encoder = torch.nn.ModuleList()
    for i in range(self.depth + 1):  # Downsampling layers
        down_in_channels = dimensions * (2 ** i)
        down_out_channels = 2 * down_in_channels
        pooling_layer = torch.nn.AvgPool2d(2)
        double_convolution_layer = double_convolution(
                                                      input_channels=down_in_channels,
                                                      mid_channels=down_in_channels,
                                                      output_channels=down_in_channels,
                                                      kernel_size=kernel_size,
                                                      bias=bias,
                                                      normalization=normalization,
                                                      activation=activation
                                                     )
        sam = spatially_adaptive_module(
                                        input_channels=down_in_channels,
                                        output_channels=down_out_channels,
                                        kernel_size=kernel_size,
                                        bias=bias,
                                        activation=activation
                                       )
        self.encoder.append(torch.nn.ModuleList([pooling_layer, double_convolution_layer, sam]))
    self.global_feature_module = torch.nn.ModuleList()
    double_convolution_layer = double_convolution(
                                                  input_channels=dimensions * (2 ** (depth + 1)),
                                                  mid_channels=dimensions * (2 ** (depth + 1)),
                                                  output_channels=dimensions * (2 ** (depth + 1)),
                                                  kernel_size=kernel_size,
                                                  bias=bias,
                                                  normalization=normalization,
                                                  activation=activation
                                                 )
    global_feature_layer = global_feature_module(
                                                 input_channels=dimensions * (2 ** (depth + 1)),
                                                 mid_channels=dimensions * (2 ** (depth + 1)),
                                                 output_channels=dimensions * (2 ** (depth + 1)),
                                                 kernel_size=kernel_size,
                                                 bias=bias,
                                                 activation=torch.nn.LeakyReLU(0.2, inplace=True)
                                                )
    self.global_feature_module.append(torch.nn.ModuleList([double_convolution_layer, global_feature_layer]))
    self.decoder = torch.nn.ModuleList()
    for i in range(depth, -1, -1):
        up_in_channels = dimensions * (2 ** (i + 1))
        up_mid_channels = up_in_channels // 2
        if i == 0:
            up_out_channels = self.out_channels
            upsample_layer = upsample_convtranspose2d_layer(
                                                            input_channels=up_in_channels,
                                                            output_channels=up_mid_channels,
                                                            kernel_size=2,
                                                            stride=2,
                                                            bias=bias,
                                                           )
            conv_layer = torch.nn.Sequential(
                convolution_layer(
                                  input_channels=up_mid_channels,
                                  output_channels=up_mid_channels,
                                  kernel_size=kernel_size,
                                  bias=bias,
                                  normalization=normalization,
                                  activation=activation,
                                 ),
                convolution_layer(
                                  input_channels=up_mid_channels,
                                  output_channels=up_out_channels,
                                  kernel_size=1,
                                  bias=bias,
                                  normalization=normalization,
                                  activation=None,
                                 )
            )
            self.decoder.append(torch.nn.ModuleList([upsample_layer, conv_layer]))
        else:
            up_out_channels = up_in_channels // 2
            upsample_layer = upsample_convtranspose2d_layer(
                                                            input_channels=up_in_channels,
                                                            output_channels=up_mid_channels,
                                                            kernel_size=2,
                                                            stride=2,
                                                            bias=bias,
                                                           )
            conv_layer = double_convolution(
                                            input_channels=up_mid_channels,
                                            mid_channels=up_mid_channels,
                                            output_channels=up_out_channels,
                                            kernel_size=kernel_size,
                                            bias=bias,
                                            normalization=normalization,
                                            activation=activation,
                                           )
            self.decoder.append(torch.nn.ModuleList([upsample_layer, conv_layer]))

forward(sv_kernel, field)

Forward model.

Parameters:

  • sv_kernel (list of torch.tensor) –
        Learned spatially varying kernels.
        Dimension of each element in the list: (1, C_i * kernel_size * kernel_size, H_i, W_i),
        where C_i, H_i, and W_i represent the channel, height, and width
        of each feature at a certain scale.
    
  • field
        Input field data.
        Dimension: (1, 6, H, W)
    

Returns:

  • target_field ( tensor ) –

    Estimated output. Dimension: (1, 6, H, W)

Source code in odak/learn/models/models.py
def forward(self, sv_kernel, field):
    """
    Forward model.

    Parameters
    ----------
    sv_kernel : list of torch.tensor
                Learned spatially varying kernels.
                Dimension of each element in the list: (1, C_i * kernel_size * kernel_size, H_i, W_i),
                where C_i, H_i, and W_i represent the channel, height, and width
                of each feature at a certain scale.

    field     : torch.tensor
                Input field data.
                Dimension: (1, 6, H, W)

    Returns
    -------
    target_field : torch.tensor
                   Estimated output.
                   Dimension: (1, 6, H, W)
    """
    x = self.inc(field)
    downsampling_outputs = [x]
    for i, down_layer in enumerate(self.encoder):
        x_down = down_layer[0](downsampling_outputs[-1])
        downsampling_outputs.append(x_down)
        sam_output = down_layer[2](x_down + down_layer[1](x_down), sv_kernel[self.depth - i])
        downsampling_outputs.append(sam_output)
    global_feature = self.global_feature_module[0][0](downsampling_outputs[-1])
    global_feature = self.global_feature_module[0][1](downsampling_outputs[-1], global_feature)
    downsampling_outputs.append(global_feature)
    x_up = downsampling_outputs[-1]
    for i, up_layer in enumerate(self.decoder):
        x_up = up_layer[0](x_up, downsampling_outputs[2 * (self.depth - i)])
        x_up = up_layer[1](x_up)
    result = x_up
    return result

spatially_varying_kernel_generation_model

Bases: Module

Spatially_varying_kernel_generation_model revised from RSGUnet: https://github.com/MTLab/rsgunet_image_enhance.

Refer to: J. Huang, P. Zhu, M. Geng et al. Range Scaling Global U-Net for Perceptual Image Enhancement on Mobile Devices.

Source code in odak/learn/models/models.py
class spatially_varying_kernel_generation_model(torch.nn.Module):
    """
    Spatially_varying_kernel_generation_model revised from RSGUnet:
    https://github.com/MTLab/rsgunet_image_enhance.

    Refer to:
    J. Huang, P. Zhu, M. Geng et al. Range Scaling Global U-Net for Perceptual Image Enhancement on Mobile Devices.
    """

    def __init__(
                 self,
                 depth = 3,
                 dimensions = 8,
                 input_channels = 7,
                 kernel_size = 3,
                 bias = True,
                 normalization = False,
                 activation = torch.nn.LeakyReLU(0.2, inplace = True)
                ):
        """
        U-Net model.

        Parameters
        ----------
        depth          : int
                         Number of upsampling and downsampling layers.
        dimensions     : int
                         Number of dimensions.
        input_channels : int
                         Number of input channels.
        bias           : bool
                         Set to True to let convolutional layers learn a bias term.
        normalization  : bool
                         If True, adds a Batch Normalization layer after the convolutional layer.
        activation     : torch.nn
                         Non-linear activation layer (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()).
        """
        super().__init__()
        self.depth = depth
        self.inc = convolution_layer(
                                     input_channels = input_channels,
                                     output_channels = dimensions,
                                     kernel_size = kernel_size,
                                     bias = bias,
                                     normalization = normalization,
                                     activation = activation
                                    )
        self.encoder = torch.nn.ModuleList()
        for i in range(depth + 1):  # downsampling layers
            if i == 0:
                in_channels = dimensions * (2 ** i)
                out_channels = dimensions * (2 ** i)
            elif i == depth:
                in_channels = dimensions * (2 ** (i - 1))
                out_channels = dimensions * (2 ** (i - 1))
            else:
                in_channels = dimensions * (2 ** (i - 1))
                out_channels = 2 * in_channels
            pooling_layer = torch.nn.AvgPool2d(2)
            double_convolution_layer = double_convolution(
                                                          input_channels = in_channels,
                                                          mid_channels = in_channels,
                                                          output_channels = out_channels,
                                                          kernel_size = kernel_size,
                                                          bias = bias,
                                                          normalization = normalization,
                                                          activation = activation
                                                         )
            self.encoder.append(pooling_layer)
            self.encoder.append(double_convolution_layer)
        self.spatially_varying_feature = torch.nn.ModuleList()  # for kernel generation
        for i in range(depth, -1, -1):
            if i == 1:
                svf_in_channels = dimensions + 2 ** (self.depth + i) + 1
            else:
                svf_in_channels = 2 ** (self.depth + i) + 1
            svf_out_channels = (2 ** (self.depth + i)) * (kernel_size * kernel_size)
            svf_mid_channels = dimensions * (2 ** (self.depth - 1))
            spatially_varying_kernel_generation = torch.nn.ModuleList()
            for j in range(i, -1, -1):
                pooling_layer = torch.nn.AvgPool2d(2 ** (j + 1))
                spatially_varying_kernel_generation.append(pooling_layer)
            kernel_generation_block = torch.nn.Sequential(
                torch.nn.Conv2d(
                                in_channels = svf_in_channels,
                                out_channels = svf_mid_channels,
                                kernel_size = kernel_size,
                                padding = kernel_size // 2,
                                bias = bias
                               ),
                activation,
                torch.nn.Conv2d(
                                in_channels = svf_mid_channels,
                                out_channels = svf_mid_channels,
                                kernel_size = kernel_size,
                                padding = kernel_size // 2,
                                bias = bias
                               ),
                activation,
                torch.nn.Conv2d(
                                in_channels = svf_mid_channels,
                                out_channels = svf_out_channels,
                                kernel_size = kernel_size,
                                padding = kernel_size // 2,
                                bias = bias
                               ),
            )
            spatially_varying_kernel_generation.append(kernel_generation_block)
            self.spatially_varying_feature.append(spatially_varying_kernel_generation)
        self.decoder = torch.nn.ModuleList()
        global_feature_layer = global_feature_module(  # global feature layer
                                                     input_channels = dimensions * (2 ** (depth - 1)),
                                                     mid_channels = dimensions * (2 ** (depth - 1)),
                                                     output_channels = dimensions * (2 ** (depth - 1)),
                                                     kernel_size = kernel_size,
                                                     bias = bias,
                                                     activation = torch.nn.LeakyReLU(0.2, inplace = True)
                                                    )
        self.decoder.append(global_feature_layer)
        for i in range(depth, 0, -1):
            if i == 2:
                up_in_channels = (dimensions // 2) * (2 ** i)
                up_out_channels = up_in_channels
                up_mid_channels = up_in_channels
            elif i == 1:
                up_in_channels = dimensions * 2
                up_out_channels = dimensions
                up_mid_channels = up_out_channels
            else:
                up_in_channels = (dimensions // 2) * (2 ** i)
                up_out_channels = up_in_channels // 2
                up_mid_channels = up_in_channels
            upsample_layer = upsample_convtranspose2d_layer(
                                                            input_channels = up_in_channels,
                                                            output_channels = up_mid_channels,
                                                            kernel_size = 2,
                                                            stride = 2,
                                                            bias = bias,
                                                           )
            conv_layer = double_convolution(
                                            input_channels = up_mid_channels,
                                            output_channels = up_out_channels,
                                            kernel_size = kernel_size,
                                            bias = bias,
                                            normalization = normalization,
                                            activation = activation,
                                           )
            self.decoder.append(torch.nn.ModuleList([upsample_layer, conv_layer]))


    def forward(self, focal_surface, field):
        """
        Forward model.

        Parameters
        ----------
        focal_surface : torch.tensor
                        Input focal surface data.
                        Dimension: (1, 1, H, W)

        field         : torch.tensor
                        Input field data.
                        Dimension: (1, 6, H, W)

        Returns
        -------
        sv_kernel : list of torch.tensor
                    Learned spatially varying kernels.
                    Dimension of each element in the list: (1, C_i * kernel_size * kernel_size, H_i, W_i),
                    where C_i, H_i, and W_i represent the channel, height, and width
                    of each feature at a certain scale.
        """
        x = self.inc(torch.cat((focal_surface, field), dim = 1))
        downsampling_outputs = [focal_surface]
        downsampling_outputs.append(x)
        for i, down_layer in enumerate(self.encoder):
            x_down = down_layer(downsampling_outputs[-1])
            downsampling_outputs.append(x_down)
        sv_kernels = []
        for i, (up_layer, svf_layer) in enumerate(zip(self.decoder, self.spatially_varying_feature)):
            if i == 0:
                global_feature = up_layer(downsampling_outputs[-2], downsampling_outputs[-1])
                downsampling_outputs[-1] = global_feature
                sv_feature = [global_feature, downsampling_outputs[0]]
                for j in range(self.depth - i + 1):
                    sv_feature[1] = svf_layer[self.depth - i](sv_feature[1])
                    if j > 0:
                        sv_feature.append(svf_layer[j](downsampling_outputs[2 * j]))
                sv_feature = [sv_feature[0], sv_feature[1], sv_feature[4], sv_feature[2],
                              sv_feature[3]]
                sv_kernel = svf_layer[-1](torch.cat(sv_feature, dim = 1))
                sv_kernels.append(sv_kernel)
            else:
                x_up = up_layer[0](downsampling_outputs[-1],
                                   downsampling_outputs[2 * (self.depth + 1 - i) + 1])
                x_up = up_layer[1](x_up)
                downsampling_outputs[-1] = x_up
                sv_feature = [x_up, downsampling_outputs[0]]
                for j in range(self.depth - i + 1):
                    sv_feature[1] = svf_layer[self.depth - i](sv_feature[1])
                    if j > 0:
                        sv_feature.append(svf_layer[j](downsampling_outputs[2 * j]))
                if i == 1:
                    sv_feature = [sv_feature[0], sv_feature[1], sv_feature[3], sv_feature[2]]
                sv_kernel = svf_layer[-1](torch.cat(sv_feature, dim = 1))
                sv_kernels.append(sv_kernel)
        return sv_kernels

__init__(depth=3, dimensions=8, input_channels=7, kernel_size=3, bias=True, normalization=False, activation=torch.nn.LeakyReLU(0.2, inplace=True))

U-Net model.

Parameters:

  • depth
             Number of upsampling and downsampling layers.
    
  • dimensions
             Number of dimensions.
    
  • input_channels (int, default: 7 ) –
             Number of input channels.
    
  • bias
             Set to True to let convolutional layers learn a bias term.
    
  • normalization
             If True, adds a Batch Normalization layer after the convolutional layer.
    
  • activation
             Non-linear activation layer (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()).
    
Source code in odak/learn/models/models.py
def __init__(
             self,
             depth = 3,
             dimensions = 8,
             input_channels = 7,
             kernel_size = 3,
             bias = True,
             normalization = False,
             activation = torch.nn.LeakyReLU(0.2, inplace = True)
            ):
    """
    U-Net model.

    Parameters
    ----------
    depth          : int
                     Number of upsampling and downsampling layers.
    dimensions     : int
                     Number of dimensions.
    input_channels : int
                     Number of input channels.
    bias           : bool
                     Set to True to let convolutional layers learn a bias term.
    normalization  : bool
                     If True, adds a Batch Normalization layer after the convolutional layer.
    activation     : torch.nn
                     Non-linear activation layer (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()).
    """
    super().__init__()
    self.depth = depth
    self.inc = convolution_layer(
                                 input_channels = input_channels,
                                 output_channels = dimensions,
                                 kernel_size = kernel_size,
                                 bias = bias,
                                 normalization = normalization,
                                 activation = activation
                                )
    self.encoder = torch.nn.ModuleList()
    for i in range(depth + 1):  # downsampling layers
        if i == 0:
            in_channels = dimensions * (2 ** i)
            out_channels = dimensions * (2 ** i)
        elif i == depth:
            in_channels = dimensions * (2 ** (i - 1))
            out_channels = dimensions * (2 ** (i - 1))
        else:
            in_channels = dimensions * (2 ** (i - 1))
            out_channels = 2 * in_channels
        pooling_layer = torch.nn.AvgPool2d(2)
        double_convolution_layer = double_convolution(
                                                      input_channels = in_channels,
                                                      mid_channels = in_channels,
                                                      output_channels = out_channels,
                                                      kernel_size = kernel_size,
                                                      bias = bias,
                                                      normalization = normalization,
                                                      activation = activation
                                                     )
        self.encoder.append(pooling_layer)
        self.encoder.append(double_convolution_layer)
    self.spatially_varying_feature = torch.nn.ModuleList()  # for kernel generation
    for i in range(depth, -1, -1):
        if i == 1:
            svf_in_channels = dimensions + 2 ** (self.depth + i) + 1
        else:
            svf_in_channels = 2 ** (self.depth + i) + 1
        svf_out_channels = (2 ** (self.depth + i)) * (kernel_size * kernel_size)
        svf_mid_channels = dimensions * (2 ** (self.depth - 1))
        spatially_varying_kernel_generation = torch.nn.ModuleList()
        for j in range(i, -1, -1):
            pooling_layer = torch.nn.AvgPool2d(2 ** (j + 1))
            spatially_varying_kernel_generation.append(pooling_layer)
        kernel_generation_block = torch.nn.Sequential(
            torch.nn.Conv2d(
                            in_channels = svf_in_channels,
                            out_channels = svf_mid_channels,
                            kernel_size = kernel_size,
                            padding = kernel_size // 2,
                            bias = bias
                           ),
            activation,
            torch.nn.Conv2d(
                            in_channels = svf_mid_channels,
                            out_channels = svf_mid_channels,
                            kernel_size = kernel_size,
                            padding = kernel_size // 2,
                            bias = bias
                           ),
            activation,
            torch.nn.Conv2d(
                            in_channels = svf_mid_channels,
                            out_channels = svf_out_channels,
                            kernel_size = kernel_size,
                            padding = kernel_size // 2,
                            bias = bias
                           ),
        )
        spatially_varying_kernel_generation.append(kernel_generation_block)
        self.spatially_varying_feature.append(spatially_varying_kernel_generation)
    self.decoder = torch.nn.ModuleList()
    global_feature_layer = global_feature_module(  # global feature layer
                                                 input_channels = dimensions * (2 ** (depth - 1)),
                                                 mid_channels = dimensions * (2 ** (depth - 1)),
                                                 output_channels = dimensions * (2 ** (depth - 1)),
                                                 kernel_size = kernel_size,
                                                 bias = bias,
                                                 activation = torch.nn.LeakyReLU(0.2, inplace = True)
                                                )
    self.decoder.append(global_feature_layer)
    for i in range(depth, 0, -1):
        if i == 2:
            up_in_channels = (dimensions // 2) * (2 ** i)
            up_out_channels = up_in_channels
            up_mid_channels = up_in_channels
        elif i == 1:
            up_in_channels = dimensions * 2
            up_out_channels = dimensions
            up_mid_channels = up_out_channels
        else:
            up_in_channels = (dimensions // 2) * (2 ** i)
            up_out_channels = up_in_channels // 2
            up_mid_channels = up_in_channels
        upsample_layer = upsample_convtranspose2d_layer(
                                                        input_channels = up_in_channels,
                                                        output_channels = up_mid_channels,
                                                        kernel_size = 2,
                                                        stride = 2,
                                                        bias = bias,
                                                       )
        conv_layer = double_convolution(
                                        input_channels = up_mid_channels,
                                        output_channels = up_out_channels,
                                        kernel_size = kernel_size,
                                        bias = bias,
                                        normalization = normalization,
                                        activation = activation,
                                       )
        self.decoder.append(torch.nn.ModuleList([upsample_layer, conv_layer]))

forward(focal_surface, field)

Forward model.

Parameters:

  • focal_surface (tensor) –
            Input focal surface data.
            Dimension: (1, 1, H, W)
    
  • field
            Input field data.
            Dimension: (1, 6, H, W)
    

Returns:

  • sv_kernel ( list of torch.tensor ) –

    Learned spatially varying kernels. Dimension of each element in the list: (1, C_i * kernel_size * kernel_size, H_i, W_i), where C_i, H_i, and W_i represent the channel, height, and width of each feature at a certain scale.

Source code in odak/learn/models/models.py
def forward(self, focal_surface, field):
    """
    Forward model.

    Parameters
    ----------
    focal_surface : torch.tensor
                    Input focal surface data.
                    Dimension: (1, 1, H, W)

    field         : torch.tensor
                    Input field data.
                    Dimension: (1, 6, H, W)

    Returns
    -------
    sv_kernel : list of torch.tensor
                Learned spatially varying kernels.
                Dimension of each element in the list: (1, C_i * kernel_size * kernel_size, H_i, W_i),
                where C_i, H_i, and W_i represent the channel, height, and width
                of each feature at a certain scale.
    """
    x = self.inc(torch.cat((focal_surface, field), dim = 1))
    downsampling_outputs = [focal_surface]
    downsampling_outputs.append(x)
    for i, down_layer in enumerate(self.encoder):
        x_down = down_layer(downsampling_outputs[-1])
        downsampling_outputs.append(x_down)
    sv_kernels = []
    for i, (up_layer, svf_layer) in enumerate(zip(self.decoder, self.spatially_varying_feature)):
        if i == 0:
            global_feature = up_layer(downsampling_outputs[-2], downsampling_outputs[-1])
            downsampling_outputs[-1] = global_feature
            sv_feature = [global_feature, downsampling_outputs[0]]
            for j in range(self.depth - i + 1):
                sv_feature[1] = svf_layer[self.depth - i](sv_feature[1])
                if j > 0:
                    sv_feature.append(svf_layer[j](downsampling_outputs[2 * j]))
            sv_feature = [sv_feature[0], sv_feature[1], sv_feature[4], sv_feature[2],
                          sv_feature[3]]
            sv_kernel = svf_layer[-1](torch.cat(sv_feature, dim = 1))
            sv_kernels.append(sv_kernel)
        else:
            x_up = up_layer[0](downsampling_outputs[-1],
                               downsampling_outputs[2 * (self.depth + 1 - i) + 1])
            x_up = up_layer[1](x_up)
            downsampling_outputs[-1] = x_up
            sv_feature = [x_up, downsampling_outputs[0]]
            for j in range(self.depth - i + 1):
                sv_feature[1] = svf_layer[self.depth - i](sv_feature[1])
                if j > 0:
                    sv_feature.append(svf_layer[j](downsampling_outputs[2 * j]))
            if i == 1:
                sv_feature = [sv_feature[0], sv_feature[1], sv_feature[3], sv_feature[2]]
            sv_kernel = svf_layer[-1](torch.cat(sv_feature, dim = 1))
            sv_kernels.append(sv_kernel)
    return sv_kernels

unet

Bases: Module

A U-Net model, heavily inspired from https://github.com/milesial/Pytorch-UNet/tree/master/unet and more can be read from Ronneberger, Olaf, Philipp Fischer, and Thomas Brox. "U-net: Convolutional networks for biomedical image segmentation." Medical Image Computing and Computer-Assisted Intervention–MICCAI 2015: 18th International Conference, Munich, Germany, October 5-9, 2015, Proceedings, Part III 18. Springer International Publishing, 2015.

Source code in odak/learn/models/models.py
class unet(torch.nn.Module):
    """
    A U-Net model, heavily inspired from `https://github.com/milesial/Pytorch-UNet/tree/master/unet` and more can be read from Ronneberger, Olaf, Philipp Fischer, and Thomas Brox. "U-net: Convolutional networks for biomedical image segmentation." Medical Image Computing and Computer-Assisted Intervention–MICCAI 2015: 18th International Conference, Munich, Germany, October 5-9, 2015, Proceedings, Part III 18. Springer International Publishing, 2015.
    """

    def __init__(
                 self, 
                 depth = 4,
                 dimensions = 64, 
                 input_channels = 2, 
                 output_channels = 1, 
                 bilinear = False,
                 kernel_size = 3,
                 bias = False,
                 activation = torch.nn.ReLU(inplace = True),
                ):
        """
        U-Net model.

        Parameters
        ----------
        depth             : int
                            Number of upsampling and downsampling
        dimensions        : int
                            Number of dimensions.
        input_channels    : int
                            Number of input channels.
        output_channels   : int
                            Number of output channels.
        bilinear          : bool
                            Uses bilinear upsampling in upsampling layers when set True.
        bias              : bool
                            Set True to let convolutional layers learn a bias term.
        activation        : torch.nn
                            Non-linear activation layer to be used (e.g., torch.nn.ReLU(), torch.nn.Sigmoid().
        """
        super(unet, self).__init__()
        self.inc = double_convolution(
                                      input_channels = input_channels,
                                      mid_channels = dimensions,
                                      output_channels = dimensions,
                                      kernel_size = kernel_size,
                                      bias = bias,
                                      activation = activation
                                     )      

        self.downsampling_layers = torch.nn.ModuleList()
        self.upsampling_layers = torch.nn.ModuleList()
        for i in range(depth): # downsampling layers
            in_channels = dimensions * (2 ** i)
            out_channels = dimensions * (2 ** (i + 1))
            down_layer = downsample_layer(in_channels,
                                            out_channels,
                                            kernel_size=kernel_size,
                                            bias=bias,
                                            activation=activation
                                            )
            self.downsampling_layers.append(down_layer)      

        for i in range(depth - 1, -1, -1):  # upsampling layers
            up_in_channels = dimensions * (2 ** (i + 1))  
            up_out_channels = dimensions * (2 ** i) 
            up_layer = upsample_layer(up_in_channels, up_out_channels, kernel_size=kernel_size, bias=bias, activation=activation, bilinear=bilinear)
            self.upsampling_layers.append(up_layer)
        self.outc = torch.nn.Conv2d(
                                    dimensions, 
                                    output_channels,
                                    kernel_size = kernel_size,
                                    padding = kernel_size // 2,
                                    bias = bias
                                   )


    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x             : torch.tensor
                        Input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.      
        """
        downsampling_outputs = [self.inc(x)]
        for down_layer in self.downsampling_layers:
            x_down = down_layer(downsampling_outputs[-1])
            downsampling_outputs.append(x_down)
        x_up = downsampling_outputs[-1]
        for i, up_layer in enumerate((self.upsampling_layers)):
            x_up = up_layer(x_up, downsampling_outputs[-(i + 2)])       
        result = self.outc(x_up)
        return result

__init__(depth=4, dimensions=64, input_channels=2, output_channels=1, bilinear=False, kernel_size=3, bias=False, activation=torch.nn.ReLU(inplace=True))

U-Net model.

Parameters:

  • depth
                Number of upsampling and downsampling
    
  • dimensions
                Number of dimensions.
    
  • input_channels
                Number of input channels.
    
  • output_channels
                Number of output channels.
    
  • bilinear
                Uses bilinear upsampling in upsampling layers when set True.
    
  • bias
                Set True to let convolutional layers learn a bias term.
    
  • activation
                Non-linear activation layer to be used (e.g., torch.nn.ReLU(), torch.nn.Sigmoid().
    
Source code in odak/learn/models/models.py
def __init__(
             self, 
             depth = 4,
             dimensions = 64, 
             input_channels = 2, 
             output_channels = 1, 
             bilinear = False,
             kernel_size = 3,
             bias = False,
             activation = torch.nn.ReLU(inplace = True),
            ):
    """
    U-Net model.

    Parameters
    ----------
    depth             : int
                        Number of upsampling and downsampling
    dimensions        : int
                        Number of dimensions.
    input_channels    : int
                        Number of input channels.
    output_channels   : int
                        Number of output channels.
    bilinear          : bool
                        Uses bilinear upsampling in upsampling layers when set True.
    bias              : bool
                        Set True to let convolutional layers learn a bias term.
    activation        : torch.nn
                        Non-linear activation layer to be used (e.g., torch.nn.ReLU(), torch.nn.Sigmoid().
    """
    super(unet, self).__init__()
    self.inc = double_convolution(
                                  input_channels = input_channels,
                                  mid_channels = dimensions,
                                  output_channels = dimensions,
                                  kernel_size = kernel_size,
                                  bias = bias,
                                  activation = activation
                                 )      

    self.downsampling_layers = torch.nn.ModuleList()
    self.upsampling_layers = torch.nn.ModuleList()
    for i in range(depth): # downsampling layers
        in_channels = dimensions * (2 ** i)
        out_channels = dimensions * (2 ** (i + 1))
        down_layer = downsample_layer(in_channels,
                                        out_channels,
                                        kernel_size=kernel_size,
                                        bias=bias,
                                        activation=activation
                                        )
        self.downsampling_layers.append(down_layer)      

    for i in range(depth - 1, -1, -1):  # upsampling layers
        up_in_channels = dimensions * (2 ** (i + 1))  
        up_out_channels = dimensions * (2 ** i) 
        up_layer = upsample_layer(up_in_channels, up_out_channels, kernel_size=kernel_size, bias=bias, activation=activation, bilinear=bilinear)
        self.upsampling_layers.append(up_layer)
    self.outc = torch.nn.Conv2d(
                                dimensions, 
                                output_channels,
                                kernel_size = kernel_size,
                                padding = kernel_size // 2,
                                bias = bias
                               )

forward(x)

Forward model.

Parameters:

  • x
            Input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/models.py
def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x             : torch.tensor
                    Input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.      
    """
    downsampling_outputs = [self.inc(x)]
    for down_layer in self.downsampling_layers:
        x_down = down_layer(downsampling_outputs[-1])
        downsampling_outputs.append(x_down)
    x_up = downsampling_outputs[-1]
    for i, up_layer in enumerate((self.upsampling_layers)):
        x_up = up_layer(x_up, downsampling_outputs[-(i + 2)])       
    result = self.outc(x_up)
    return result

upsample_convtranspose2d_layer

Bases: Module

An upsampling convtranspose2d layer.

Source code in odak/learn/models/components.py
class upsample_convtranspose2d_layer(torch.nn.Module):
    """
    An upsampling convtranspose2d layer.
    """
    def __init__(
                 self,
                 input_channels,
                 output_channels,
                 kernel_size = 2,
                 stride = 2,
                 bias = False,
                ):
        """
        A downscaling component with a double convolution.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        """
        super().__init__()
        self.up = torch.nn.ConvTranspose2d(
                                           in_channels = input_channels,
                                           out_channels = output_channels,
                                           bias = bias,
                                           kernel_size = kernel_size,
                                           stride = stride
                                          )

    def forward(self, x1, x2):
        """
        Forward model.

        Parameters
        ----------
        x1             : torch.tensor
                         First input data.
        x2             : torch.tensor
                         Second input data.


        Returns
        ----------
        result        : torch.tensor
                        Result of the forward operation
        """
        x1 = self.up(x1)
        diffY = x2.size()[2] - x1.size()[2]
        diffX = x2.size()[3] - x1.size()[3]
        x1 = torch.nn.functional.pad(x1, [diffX // 2, diffX - diffX // 2,
                                          diffY // 2, diffY - diffY // 2])
        result = x1 + x2
        return result

__init__(input_channels, output_channels, kernel_size=2, stride=2, bias=False)

A downscaling component with a double convolution.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int) –
              Number of output channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
Source code in odak/learn/models/components.py
def __init__(
             self,
             input_channels,
             output_channels,
             kernel_size = 2,
             stride = 2,
             bias = False,
            ):
    """
    A downscaling component with a double convolution.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    """
    super().__init__()
    self.up = torch.nn.ConvTranspose2d(
                                       in_channels = input_channels,
                                       out_channels = output_channels,
                                       bias = bias,
                                       kernel_size = kernel_size,
                                       stride = stride
                                      )

forward(x1, x2)

Forward model.

Parameters:

  • x1
             First input data.
    
  • x2
             Second input data.
    

Returns:

  • result ( tensor ) –

    Result of the forward operation

Source code in odak/learn/models/components.py
def forward(self, x1, x2):
    """
    Forward model.

    Parameters
    ----------
    x1             : torch.tensor
                     First input data.
    x2             : torch.tensor
                     Second input data.


    Returns
    ----------
    result        : torch.tensor
                    Result of the forward operation
    """
    x1 = self.up(x1)
    diffY = x2.size()[2] - x1.size()[2]
    diffX = x2.size()[3] - x1.size()[3]
    x1 = torch.nn.functional.pad(x1, [diffX // 2, diffX - diffX // 2,
                                      diffY // 2, diffY - diffY // 2])
    result = x1 + x2
    return result

upsample_layer

Bases: Module

An upsampling convolutional layer.

Source code in odak/learn/models/components.py
class upsample_layer(torch.nn.Module):
    """
    An upsampling convolutional layer.
    """
    def __init__(
                 self,
                 input_channels,
                 output_channels,
                 kernel_size = 3,
                 bias = False,
                 activation = torch.nn.ReLU(),
                 bilinear = True
                ):
        """
        A downscaling component with a double convolution.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool 
                          Set to True to let convolutional layers have bias term.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        bilinear        : bool
                          If set to True, bilinear sampling is used.
        """
        super(upsample_layer, self).__init__()
        if bilinear:
            self.up = torch.nn.Upsample(scale_factor = 2, mode = 'bilinear', align_corners = True)
            self.conv = double_convolution(
                                           input_channels = input_channels + output_channels,
                                           mid_channels = input_channels // 2,
                                           output_channels = output_channels,
                                           kernel_size = kernel_size,
                                           bias = bias,
                                           activation = activation
                                          )
        else:
            self.up = torch.nn.ConvTranspose2d(input_channels , input_channels // 2, kernel_size = 2, stride = 2)
            self.conv = double_convolution(
                                           input_channels = input_channels,
                                           mid_channels = output_channels,
                                           output_channels = output_channels,
                                           kernel_size = kernel_size,
                                           bias = bias,
                                           activation = activation
                                          )


    def forward(self, x1, x2):
        """
        Forward model.

        Parameters
        ----------
        x1             : torch.tensor
                         First input data.
        x2             : torch.tensor
                         Second input data.


        Returns
        ----------
        result        : torch.tensor
                        Result of the forward operation
        """ 
        x1 = self.up(x1)
        diffY = x2.size()[2] - x1.size()[2]
        diffX = x2.size()[3] - x1.size()[3]
        x1 = torch.nn.functional.pad(x1, [diffX // 2, diffX - diffX // 2,
                                          diffY // 2, diffY - diffY // 2])
        x = torch.cat([x2, x1], dim = 1)
        result = self.conv(x)
        return result

__init__(input_channels, output_channels, kernel_size=3, bias=False, activation=torch.nn.ReLU(), bilinear=True)

A downscaling component with a double convolution.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int) –
              Number of output channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
  • bilinear
              If set to True, bilinear sampling is used.
    
Source code in odak/learn/models/components.py
def __init__(
             self,
             input_channels,
             output_channels,
             kernel_size = 3,
             bias = False,
             activation = torch.nn.ReLU(),
             bilinear = True
            ):
    """
    A downscaling component with a double convolution.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool 
                      Set to True to let convolutional layers have bias term.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    bilinear        : bool
                      If set to True, bilinear sampling is used.
    """
    super(upsample_layer, self).__init__()
    if bilinear:
        self.up = torch.nn.Upsample(scale_factor = 2, mode = 'bilinear', align_corners = True)
        self.conv = double_convolution(
                                       input_channels = input_channels + output_channels,
                                       mid_channels = input_channels // 2,
                                       output_channels = output_channels,
                                       kernel_size = kernel_size,
                                       bias = bias,
                                       activation = activation
                                      )
    else:
        self.up = torch.nn.ConvTranspose2d(input_channels , input_channels // 2, kernel_size = 2, stride = 2)
        self.conv = double_convolution(
                                       input_channels = input_channels,
                                       mid_channels = output_channels,
                                       output_channels = output_channels,
                                       kernel_size = kernel_size,
                                       bias = bias,
                                       activation = activation
                                      )

forward(x1, x2)

Forward model.

Parameters:

  • x1
             First input data.
    
  • x2
             Second input data.
    

Returns:

  • result ( tensor ) –

    Result of the forward operation

Source code in odak/learn/models/components.py
def forward(self, x1, x2):
    """
    Forward model.

    Parameters
    ----------
    x1             : torch.tensor
                     First input data.
    x2             : torch.tensor
                     Second input data.


    Returns
    ----------
    result        : torch.tensor
                    Result of the forward operation
    """ 
    x1 = self.up(x1)
    diffY = x2.size()[2] - x1.size()[2]
    diffX = x2.size()[3] - x1.size()[3]
    x1 = torch.nn.functional.pad(x1, [diffX // 2, diffX - diffX // 2,
                                      diffY // 2, diffY - diffY // 2])
    x = torch.cat([x2, x1], dim = 1)
    result = self.conv(x)
    return result

gaussian(x, multiplier=1.0)

A Gaussian non-linear activation. For more details: Ramasinghe, Sameera, and Simon Lucey. "Beyond periodicity: Towards a unifying framework for activations in coordinate-mlps." In European Conference on Computer Vision, pp. 142-158. Cham: Springer Nature Switzerland, 2022.

Parameters:

  • x
           Input data.
    
  • multiplier
           Multiplier.
    

Returns:

  • result ( float or tensor ) –

    Ouput data.

Source code in odak/learn/models/components.py
def gaussian(x, multiplier = 1.):
    """
    A Gaussian non-linear activation.
    For more details: Ramasinghe, Sameera, and Simon Lucey. "Beyond periodicity: Towards a unifying framework for activations in coordinate-mlps." In European Conference on Computer Vision, pp. 142-158. Cham: Springer Nature Switzerland, 2022.

    Parameters
    ----------
    x            : float or torch.tensor
                   Input data.
    multiplier   : float or torch.tensor
                   Multiplier.

    Returns
    -------
    result       : float or torch.tensor
                   Ouput data.
    """
    result = torch.exp(- (multiplier * x) ** 2)
    return result

swish(x)

A swish non-linear activation. For more details: https://en.wikipedia.org/wiki/Swish_function

Parameters:

  • x
             Input.
    

Returns:

  • out ( float or tensor ) –

    Output.

Source code in odak/learn/models/components.py
def swish(x):
    """
    A swish non-linear activation.
    For more details: https://en.wikipedia.org/wiki/Swish_function

    Parameters
    -----------
    x              : float or torch.tensor
                     Input.

    Returns
    -------
    out            : float or torch.tensor
                     Output.
    """
    out = x * torch.sigmoid(x)
    return out

channel_gate

Bases: Module

Channel attention module with various pooling strategies. This class is heavily inspired https://github.com/Jongchan/attention-module/commit/e4ee180f1335c09db14d39a65d97c8ca3d1f7b16 (MIT License).

Source code in odak/learn/models/components.py
class channel_gate(torch.nn.Module):
    """
    Channel attention module with various pooling strategies.
    This class is heavily inspired https://github.com/Jongchan/attention-module/commit/e4ee180f1335c09db14d39a65d97c8ca3d1f7b16 (MIT License).
    """
    def __init__(
                 self, 
                 gate_channels, 
                 reduction_ratio = 16, 
                 pool_types = ['avg', 'max']
                ):
        """
        Initializes the channel gate module.

        Parameters
        ----------
        gate_channels   : int
                          Number of channels of the input feature map.
        reduction_ratio : int
                          Reduction ratio for the intermediate layer.
        pool_types      : list
                          List of pooling operations to apply.
        """
        super().__init__()
        self.gate_channels = gate_channels
        hidden_channels = gate_channels // reduction_ratio
        if hidden_channels == 0:
            hidden_channels = 1
        self.mlp = torch.nn.Sequential(
                                       convolutional_block_attention.Flatten(),
                                       torch.nn.Linear(gate_channels, hidden_channels),
                                       torch.nn.ReLU(),
                                       torch.nn.Linear(hidden_channels, gate_channels)
                                      )
        self.pool_types = pool_types


    def forward(self, x):
        """
        Forward pass of the ChannelGate module.

        Applies channel-wise attention to the input tensor.

        Parameters
        ----------
        x            : torch.tensor
                       Input tensor to the ChannelGate module.

        Returns
        -------
        output       : torch.tensor
                       Output tensor after applying channel attention.
        """
        channel_att_sum = None
        for pool_type in self.pool_types:
            if pool_type == 'avg':
                pool = torch.nn.functional.avg_pool2d(x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
            elif pool_type == 'max':
                pool = torch.nn.functional.max_pool2d(x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
            channel_att_raw = self.mlp(pool)
            channel_att_sum = channel_att_raw if channel_att_sum is None else channel_att_sum + channel_att_raw
        scale = torch.sigmoid(channel_att_sum).unsqueeze(2).unsqueeze(3).expand_as(x)
        output = x * scale
        return output

__init__(gate_channels, reduction_ratio=16, pool_types=['avg', 'max'])

Initializes the channel gate module.

Parameters:

  • gate_channels
              Number of channels of the input feature map.
    
  • reduction_ratio (int, default: 16 ) –
              Reduction ratio for the intermediate layer.
    
  • pool_types
              List of pooling operations to apply.
    
Source code in odak/learn/models/components.py
def __init__(
             self, 
             gate_channels, 
             reduction_ratio = 16, 
             pool_types = ['avg', 'max']
            ):
    """
    Initializes the channel gate module.

    Parameters
    ----------
    gate_channels   : int
                      Number of channels of the input feature map.
    reduction_ratio : int
                      Reduction ratio for the intermediate layer.
    pool_types      : list
                      List of pooling operations to apply.
    """
    super().__init__()
    self.gate_channels = gate_channels
    hidden_channels = gate_channels // reduction_ratio
    if hidden_channels == 0:
        hidden_channels = 1
    self.mlp = torch.nn.Sequential(
                                   convolutional_block_attention.Flatten(),
                                   torch.nn.Linear(gate_channels, hidden_channels),
                                   torch.nn.ReLU(),
                                   torch.nn.Linear(hidden_channels, gate_channels)
                                  )
    self.pool_types = pool_types

forward(x)

Forward pass of the ChannelGate module.

Applies channel-wise attention to the input tensor.

Parameters:

  • x
           Input tensor to the ChannelGate module.
    

Returns:

  • output ( tensor ) –

    Output tensor after applying channel attention.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward pass of the ChannelGate module.

    Applies channel-wise attention to the input tensor.

    Parameters
    ----------
    x            : torch.tensor
                   Input tensor to the ChannelGate module.

    Returns
    -------
    output       : torch.tensor
                   Output tensor after applying channel attention.
    """
    channel_att_sum = None
    for pool_type in self.pool_types:
        if pool_type == 'avg':
            pool = torch.nn.functional.avg_pool2d(x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
        elif pool_type == 'max':
            pool = torch.nn.functional.max_pool2d(x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
        channel_att_raw = self.mlp(pool)
        channel_att_sum = channel_att_raw if channel_att_sum is None else channel_att_sum + channel_att_raw
    scale = torch.sigmoid(channel_att_sum).unsqueeze(2).unsqueeze(3).expand_as(x)
    output = x * scale
    return output

convolution_layer

Bases: Module

A convolution layer.

Source code in odak/learn/models/components.py
class convolution_layer(torch.nn.Module):
    """
    A convolution layer.
    """
    def __init__(
                 self,
                 input_channels = 2,
                 output_channels = 2,
                 kernel_size = 3,
                 bias = False,
                 stride = 1,
                 normalization = True,
                 activation = torch.nn.ReLU()
                ):
        """
        A convolutional layer class.


        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        layers = [
            torch.nn.Conv2d(
                            input_channels,
                            output_channels,
                            kernel_size = kernel_size,
                            stride = stride,
                            padding = kernel_size // 2,
                            bias = bias
                           )
        ]
        if normalization:
            layers.append(torch.nn.BatchNorm2d(output_channels))
        if activation:
            layers.append(activation)
        self.model = torch.nn.Sequential(*layers)


    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x             : torch.tensor
                        Input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        result = self.model(x)
        return result

__init__(input_channels=2, output_channels=2, kernel_size=3, bias=False, stride=1, normalization=True, activation=torch.nn.ReLU())

A convolutional layer class.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int, default: 2 ) –
              Number of output channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • normalization
              If True, adds a Batch Normalization layer after the convolutional layer.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
             self,
             input_channels = 2,
             output_channels = 2,
             kernel_size = 3,
             bias = False,
             stride = 1,
             normalization = True,
             activation = torch.nn.ReLU()
            ):
    """
    A convolutional layer class.


    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    layers = [
        torch.nn.Conv2d(
                        input_channels,
                        output_channels,
                        kernel_size = kernel_size,
                        stride = stride,
                        padding = kernel_size // 2,
                        bias = bias
                       )
    ]
    if normalization:
        layers.append(torch.nn.BatchNorm2d(output_channels))
    if activation:
        layers.append(activation)
    self.model = torch.nn.Sequential(*layers)

forward(x)

Forward model.

Parameters:

  • x
            Input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x             : torch.tensor
                    Input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    result = self.model(x)
    return result

convolutional_block_attention

Bases: Module

Convolutional Block Attention Module (CBAM) class. This class is heavily inspired https://github.com/Jongchan/attention-module/commit/e4ee180f1335c09db14d39a65d97c8ca3d1f7b16 (MIT License).

Source code in odak/learn/models/components.py
class convolutional_block_attention(torch.nn.Module):
    """
    Convolutional Block Attention Module (CBAM) class. 
    This class is heavily inspired https://github.com/Jongchan/attention-module/commit/e4ee180f1335c09db14d39a65d97c8ca3d1f7b16 (MIT License).
    """
    def __init__(
                 self, 
                 gate_channels, 
                 reduction_ratio = 16, 
                 pool_types = ['avg', 'max'], 
                 no_spatial = False
                ):
        """
        Initializes the convolutional block attention module.

        Parameters
        ----------
        gate_channels   : int
                          Number of channels of the input feature map.
        reduction_ratio : int
                          Reduction ratio for the channel attention.
        pool_types      : list
                          List of pooling operations to apply for channel attention.
        no_spatial      : bool
                          If True, spatial attention is not applied.
        """
        super(convolutional_block_attention, self).__init__()
        self.channel_gate = channel_gate(gate_channels, reduction_ratio, pool_types)
        self.no_spatial = no_spatial
        if not no_spatial:
            self.spatial_gate = spatial_gate()


    class Flatten(torch.nn.Module):
        """
        Flattens the input tensor to a 2D matrix.
        """
        def forward(self, x):
            return x.view(x.size(0), -1)


    def forward(self, x):
        """
        Forward pass of the convolutional block attention module.

        Parameters
        ----------
        x            : torch.tensor
                       Input tensor to the CBAM module.

        Returns
        -------
        x_out        : torch.tensor
                       Output tensor after applying channel and spatial attention.
        """
        x_out = self.channel_gate(x)
        if not self.no_spatial:
            x_out = self.spatial_gate(x_out)
        return x_out

Flatten

Bases: Module

Flattens the input tensor to a 2D matrix.

Source code in odak/learn/models/components.py
class Flatten(torch.nn.Module):
    """
    Flattens the input tensor to a 2D matrix.
    """
    def forward(self, x):
        return x.view(x.size(0), -1)

__init__(gate_channels, reduction_ratio=16, pool_types=['avg', 'max'], no_spatial=False)

Initializes the convolutional block attention module.

Parameters:

  • gate_channels
              Number of channels of the input feature map.
    
  • reduction_ratio (int, default: 16 ) –
              Reduction ratio for the channel attention.
    
  • pool_types
              List of pooling operations to apply for channel attention.
    
  • no_spatial
              If True, spatial attention is not applied.
    
Source code in odak/learn/models/components.py
def __init__(
             self, 
             gate_channels, 
             reduction_ratio = 16, 
             pool_types = ['avg', 'max'], 
             no_spatial = False
            ):
    """
    Initializes the convolutional block attention module.

    Parameters
    ----------
    gate_channels   : int
                      Number of channels of the input feature map.
    reduction_ratio : int
                      Reduction ratio for the channel attention.
    pool_types      : list
                      List of pooling operations to apply for channel attention.
    no_spatial      : bool
                      If True, spatial attention is not applied.
    """
    super(convolutional_block_attention, self).__init__()
    self.channel_gate = channel_gate(gate_channels, reduction_ratio, pool_types)
    self.no_spatial = no_spatial
    if not no_spatial:
        self.spatial_gate = spatial_gate()

forward(x)

Forward pass of the convolutional block attention module.

Parameters:

  • x
           Input tensor to the CBAM module.
    

Returns:

  • x_out ( tensor ) –

    Output tensor after applying channel and spatial attention.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward pass of the convolutional block attention module.

    Parameters
    ----------
    x            : torch.tensor
                   Input tensor to the CBAM module.

    Returns
    -------
    x_out        : torch.tensor
                   Output tensor after applying channel and spatial attention.
    """
    x_out = self.channel_gate(x)
    if not self.no_spatial:
        x_out = self.spatial_gate(x_out)
    return x_out

double_convolution

Bases: Module

A double convolution layer.

Source code in odak/learn/models/components.py
class double_convolution(torch.nn.Module):
    """
    A double convolution layer.
    """
    def __init__(
                 self,
                 input_channels = 2,
                 mid_channels = None,
                 output_channels = 2,
                 kernel_size = 3, 
                 bias = False,
                 normalization = True,
                 activation = torch.nn.ReLU()
                ):
        """
        Double convolution model.


        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        mid_channels    : int
                          Number of channels in the hidden layer between two convolutions.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool 
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        if isinstance(mid_channels, type(None)):
            mid_channels = output_channels
        self.activation = activation
        self.model = torch.nn.Sequential(
                                         convolution_layer(
                                                           input_channels = input_channels,
                                                           output_channels = mid_channels,
                                                           kernel_size = kernel_size,
                                                           bias = bias,
                                                           normalization = normalization,
                                                           activation = self.activation
                                                          ),
                                         convolution_layer(
                                                           input_channels = mid_channels,
                                                           output_channels = output_channels,
                                                           kernel_size = kernel_size,
                                                           bias = bias,
                                                           normalization = normalization,
                                                           activation = self.activation
                                                          )
                                        )


    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x             : torch.tensor
                        Input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.      
        """
        result = self.model(x)
        return result

__init__(input_channels=2, mid_channels=None, output_channels=2, kernel_size=3, bias=False, normalization=True, activation=torch.nn.ReLU())

Double convolution model.

Parameters:

  • input_channels
              Number of input channels.
    
  • mid_channels
              Number of channels in the hidden layer between two convolutions.
    
  • output_channels (int, default: 2 ) –
              Number of output channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • normalization
              If True, adds a Batch Normalization layer after the convolutional layer.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
             self,
             input_channels = 2,
             mid_channels = None,
             output_channels = 2,
             kernel_size = 3, 
             bias = False,
             normalization = True,
             activation = torch.nn.ReLU()
            ):
    """
    Double convolution model.


    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    mid_channels    : int
                      Number of channels in the hidden layer between two convolutions.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool 
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    if isinstance(mid_channels, type(None)):
        mid_channels = output_channels
    self.activation = activation
    self.model = torch.nn.Sequential(
                                     convolution_layer(
                                                       input_channels = input_channels,
                                                       output_channels = mid_channels,
                                                       kernel_size = kernel_size,
                                                       bias = bias,
                                                       normalization = normalization,
                                                       activation = self.activation
                                                      ),
                                     convolution_layer(
                                                       input_channels = mid_channels,
                                                       output_channels = output_channels,
                                                       kernel_size = kernel_size,
                                                       bias = bias,
                                                       normalization = normalization,
                                                       activation = self.activation
                                                      )
                                    )

forward(x)

Forward model.

Parameters:

  • x
            Input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x             : torch.tensor
                    Input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.      
    """
    result = self.model(x)
    return result

downsample_layer

Bases: Module

A downscaling component followed by a double convolution.

Source code in odak/learn/models/components.py
class downsample_layer(torch.nn.Module):
    """
    A downscaling component followed by a double convolution.
    """
    def __init__(
                 self,
                 input_channels,
                 output_channels,
                 kernel_size = 3,
                 bias = False,
                 activation = torch.nn.ReLU()
                ):
        """
        A downscaling component with a double convolution.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool 
                          Set to True to let convolutional layers have bias term.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        self.maxpool_conv = torch.nn.Sequential(
                                                torch.nn.MaxPool2d(2),
                                                double_convolution(
                                                                   input_channels = input_channels,
                                                                   mid_channels = output_channels,
                                                                   output_channels = output_channels,
                                                                   kernel_size = kernel_size,
                                                                   bias = bias,
                                                                   activation = activation
                                                                  )
                                               )


    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x              : torch.tensor
                         First input data.



        Returns
        ----------
        result        : torch.tensor
                        Estimated output.      
        """
        result = self.maxpool_conv(x)
        return result

__init__(input_channels, output_channels, kernel_size=3, bias=False, activation=torch.nn.ReLU())

A downscaling component with a double convolution.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int) –
              Number of output channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
             self,
             input_channels,
             output_channels,
             kernel_size = 3,
             bias = False,
             activation = torch.nn.ReLU()
            ):
    """
    A downscaling component with a double convolution.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool 
                      Set to True to let convolutional layers have bias term.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    self.maxpool_conv = torch.nn.Sequential(
                                            torch.nn.MaxPool2d(2),
                                            double_convolution(
                                                               input_channels = input_channels,
                                                               mid_channels = output_channels,
                                                               output_channels = output_channels,
                                                               kernel_size = kernel_size,
                                                               bias = bias,
                                                               activation = activation
                                                              )
                                           )

forward(x)

Forward model.

Parameters:

  • x
             First input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x              : torch.tensor
                     First input data.



    Returns
    ----------
    result        : torch.tensor
                    Estimated output.      
    """
    result = self.maxpool_conv(x)
    return result

global_feature_module

Bases: Module

A global feature layer that processes global features from input channels and applies them to another input tensor via learned transformations.

Source code in odak/learn/models/components.py
class global_feature_module(torch.nn.Module):
    """
    A global feature layer that processes global features from input channels and
    applies them to another input tensor via learned transformations.
    """
    def __init__(
                 self,
                 input_channels,
                 mid_channels,
                 output_channels,
                 kernel_size,
                 bias = False,
                 normalization = False,
                 activation = torch.nn.ReLU()
                ):
        """
        A global feature layer.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        mid_channels  : int
                          Number of mid channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        self.transformations_1 = global_transformations(input_channels, output_channels)
        self.global_features_1 = double_convolution(
                                                    input_channels = input_channels,
                                                    mid_channels = mid_channels,
                                                    output_channels = output_channels,
                                                    kernel_size = kernel_size,
                                                    bias = bias,
                                                    normalization = normalization,
                                                    activation = activation
                                                   )
        self.global_features_2 = double_convolution(
                                                    input_channels = input_channels,
                                                    mid_channels = mid_channels,
                                                    output_channels = output_channels,
                                                    kernel_size = kernel_size,
                                                    bias = bias,
                                                    normalization = normalization,
                                                    activation = activation
                                                   )
        self.transformations_2 = global_transformations(input_channels, output_channels)


    def forward(self, x1, x2):
        """
        Forward model.

        Parameters
        ----------
        x1             : torch.tensor
                         First input data.
        x2             : torch.tensor
                         Second input data.

        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        global_tensor_1 = self.transformations_1(x1, x2)
        y1 = self.global_features_1(global_tensor_1)
        y2 = self.global_features_2(y1)
        global_tensor_2 = self.transformations_2(y1, y2)
        return global_tensor_2

__init__(input_channels, mid_channels, output_channels, kernel_size, bias=False, normalization=False, activation=torch.nn.ReLU())

A global feature layer.

Parameters:

  • input_channels
              Number of input channels.
    
  • mid_channels
              Number of mid channels.
    
  • output_channels (int) –
              Number of output channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • normalization
              If True, adds a Batch Normalization layer after the convolutional layer.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
             self,
             input_channels,
             mid_channels,
             output_channels,
             kernel_size,
             bias = False,
             normalization = False,
             activation = torch.nn.ReLU()
            ):
    """
    A global feature layer.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    mid_channels  : int
                      Number of mid channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    self.transformations_1 = global_transformations(input_channels, output_channels)
    self.global_features_1 = double_convolution(
                                                input_channels = input_channels,
                                                mid_channels = mid_channels,
                                                output_channels = output_channels,
                                                kernel_size = kernel_size,
                                                bias = bias,
                                                normalization = normalization,
                                                activation = activation
                                               )
    self.global_features_2 = double_convolution(
                                                input_channels = input_channels,
                                                mid_channels = mid_channels,
                                                output_channels = output_channels,
                                                kernel_size = kernel_size,
                                                bias = bias,
                                                normalization = normalization,
                                                activation = activation
                                               )
    self.transformations_2 = global_transformations(input_channels, output_channels)

forward(x1, x2)

Forward model.

Parameters:

  • x1
             First input data.
    
  • x2
             Second input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x1, x2):
    """
    Forward model.

    Parameters
    ----------
    x1             : torch.tensor
                     First input data.
    x2             : torch.tensor
                     Second input data.

    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    global_tensor_1 = self.transformations_1(x1, x2)
    y1 = self.global_features_1(global_tensor_1)
    y2 = self.global_features_2(y1)
    global_tensor_2 = self.transformations_2(y1, y2)
    return global_tensor_2

global_transformations

Bases: Module

A global feature layer that processes global features from input channels and applies learned transformations to another input tensor.

This implementation is adapted from RSGUnet: https://github.com/MTLab/rsgunet_image_enhance.

Reference: J. Huang, P. Zhu, M. Geng et al. "Range Scaling Global U-Net for Perceptual Image Enhancement on Mobile Devices."

Source code in odak/learn/models/components.py
class global_transformations(torch.nn.Module):
    """
    A global feature layer that processes global features from input channels and
    applies learned transformations to another input tensor.

    This implementation is adapted from RSGUnet:
    https://github.com/MTLab/rsgunet_image_enhance.

    Reference:
    J. Huang, P. Zhu, M. Geng et al. "Range Scaling Global U-Net for Perceptual Image Enhancement on Mobile Devices."
    """
    def __init__(
                 self,
                 input_channels,
                 output_channels
                ):
        """
        A global feature layer.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        """
        super().__init__()
        self.global_feature_1 = torch.nn.Sequential(
            torch.nn.Linear(input_channels, output_channels),
            torch.nn.LeakyReLU(0.2, inplace = True),
        )
        self.global_feature_2 = torch.nn.Sequential(
            torch.nn.Linear(output_channels, output_channels),
            torch.nn.LeakyReLU(0.2, inplace = True)
        )


    def forward(self, x1, x2):
        """
        Forward model.

        Parameters
        ----------
        x1             : torch.tensor
                         First input data.
        x2             : torch.tensor
                         Second input data.

        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        y = torch.mean(x2, dim = (2, 3))
        y1 = self.global_feature_1(y)
        y2 = self.global_feature_2(y1)
        y1 = y1.unsqueeze(2).unsqueeze(3)
        y2 = y2.unsqueeze(2).unsqueeze(3)
        result = x1 * y1 + y2
        return result

__init__(input_channels, output_channels)

A global feature layer.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int) –
              Number of output channels.
    
Source code in odak/learn/models/components.py
def __init__(
             self,
             input_channels,
             output_channels
            ):
    """
    A global feature layer.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    """
    super().__init__()
    self.global_feature_1 = torch.nn.Sequential(
        torch.nn.Linear(input_channels, output_channels),
        torch.nn.LeakyReLU(0.2, inplace = True),
    )
    self.global_feature_2 = torch.nn.Sequential(
        torch.nn.Linear(output_channels, output_channels),
        torch.nn.LeakyReLU(0.2, inplace = True)
    )

forward(x1, x2)

Forward model.

Parameters:

  • x1
             First input data.
    
  • x2
             Second input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x1, x2):
    """
    Forward model.

    Parameters
    ----------
    x1             : torch.tensor
                     First input data.
    x2             : torch.tensor
                     Second input data.

    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    y = torch.mean(x2, dim = (2, 3))
    y1 = self.global_feature_1(y)
    y2 = self.global_feature_2(y1)
    y1 = y1.unsqueeze(2).unsqueeze(3)
    y2 = y2.unsqueeze(2).unsqueeze(3)
    result = x1 * y1 + y2
    return result

non_local_layer

Bases: Module

Self-Attention Layer [zi = Wzyi + xi] (non-local block : ref https://arxiv.org/abs/1711.07971)

Source code in odak/learn/models/components.py
class non_local_layer(torch.nn.Module):
    """
    Self-Attention Layer [zi = Wzyi + xi] (non-local block : ref https://arxiv.org/abs/1711.07971)
    """
    def __init__(
                 self,
                 input_channels = 1024,
                 bottleneck_channels = 512,
                 kernel_size = 1,
                 bias = False,
                ):
        """

        Parameters
        ----------
        input_channels      : int
                              Number of input channels.
        bottleneck_channels : int
                              Number of middle channels.
        kernel_size         : int
                              Kernel size.
        bias                : bool 
                              Set to True to let convolutional layers have bias term.
        """
        super(non_local_layer, self).__init__()
        self.input_channels = input_channels
        self.bottleneck_channels = bottleneck_channels
        self.g = torch.nn.Conv2d(
                                 self.input_channels, 
                                 self.bottleneck_channels,
                                 kernel_size = kernel_size,
                                 padding = kernel_size // 2,
                                 bias = bias
                                )
        self.W_z = torch.nn.Sequential(
                                       torch.nn.Conv2d(
                                                       self.bottleneck_channels,
                                                       self.input_channels, 
                                                       kernel_size = kernel_size,
                                                       bias = bias,
                                                       padding = kernel_size // 2
                                                      ),
                                       torch.nn.BatchNorm2d(self.input_channels)
                                      )
        torch.nn.init.constant_(self.W_z[1].weight, 0)   
        torch.nn.init.constant_(self.W_z[1].bias, 0)


    def forward(self, x):
        """
        Forward model [zi = Wzyi + xi]

        Parameters
        ----------
        x               : torch.tensor
                          First input data.                       


        Returns
        ----------
        z               : torch.tensor
                          Estimated output.
        """
        batch_size, channels, height, width = x.size()
        theta = x.view(batch_size, channels, -1).permute(0, 2, 1)
        phi = x.view(batch_size, channels, -1).permute(0, 2, 1)
        g = self.g(x).view(batch_size, self.bottleneck_channels, -1).permute(0, 2, 1)
        attn = torch.bmm(theta, phi.transpose(1, 2)) / (height * width)
        attn = torch.nn.functional.softmax(attn, dim=-1)
        y = torch.bmm(attn, g).permute(0, 2, 1).contiguous().view(batch_size, self.bottleneck_channels, height, width)
        W_y = self.W_z(y)
        z = W_y + x
        return z

__init__(input_channels=1024, bottleneck_channels=512, kernel_size=1, bias=False)

Parameters:

  • input_channels
                  Number of input channels.
    
  • bottleneck_channels (int, default: 512 ) –
                  Number of middle channels.
    
  • kernel_size
                  Kernel size.
    
  • bias
                  Set to True to let convolutional layers have bias term.
    
Source code in odak/learn/models/components.py
def __init__(
             self,
             input_channels = 1024,
             bottleneck_channels = 512,
             kernel_size = 1,
             bias = False,
            ):
    """

    Parameters
    ----------
    input_channels      : int
                          Number of input channels.
    bottleneck_channels : int
                          Number of middle channels.
    kernel_size         : int
                          Kernel size.
    bias                : bool 
                          Set to True to let convolutional layers have bias term.
    """
    super(non_local_layer, self).__init__()
    self.input_channels = input_channels
    self.bottleneck_channels = bottleneck_channels
    self.g = torch.nn.Conv2d(
                             self.input_channels, 
                             self.bottleneck_channels,
                             kernel_size = kernel_size,
                             padding = kernel_size // 2,
                             bias = bias
                            )
    self.W_z = torch.nn.Sequential(
                                   torch.nn.Conv2d(
                                                   self.bottleneck_channels,
                                                   self.input_channels, 
                                                   kernel_size = kernel_size,
                                                   bias = bias,
                                                   padding = kernel_size // 2
                                                  ),
                                   torch.nn.BatchNorm2d(self.input_channels)
                                  )
    torch.nn.init.constant_(self.W_z[1].weight, 0)   
    torch.nn.init.constant_(self.W_z[1].bias, 0)

forward(x)

Forward model [zi = Wzyi + xi]

Parameters:

  • x
              First input data.
    

Returns:

  • z ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model [zi = Wzyi + xi]

    Parameters
    ----------
    x               : torch.tensor
                      First input data.                       


    Returns
    ----------
    z               : torch.tensor
                      Estimated output.
    """
    batch_size, channels, height, width = x.size()
    theta = x.view(batch_size, channels, -1).permute(0, 2, 1)
    phi = x.view(batch_size, channels, -1).permute(0, 2, 1)
    g = self.g(x).view(batch_size, self.bottleneck_channels, -1).permute(0, 2, 1)
    attn = torch.bmm(theta, phi.transpose(1, 2)) / (height * width)
    attn = torch.nn.functional.softmax(attn, dim=-1)
    y = torch.bmm(attn, g).permute(0, 2, 1).contiguous().view(batch_size, self.bottleneck_channels, height, width)
    W_y = self.W_z(y)
    z = W_y + x
    return z

normalization

Bases: Module

A normalization layer.

Source code in odak/learn/models/components.py
class normalization(torch.nn.Module):
    """
    A normalization layer.
    """
    def __init__(
                 self,
                 dim = 1,
                ):
        """
        Normalization layer.


        Parameters
        ----------
        dim             : int
                          Dimension (axis) to normalize.
        """
        super().__init__()
        self.k = torch.nn.Parameter(torch.ones(1, dim, 1, 1))


    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x             : torch.tensor
                        Input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.      
        """
        eps = 1e-5 if x.dtype == torch.float32 else 1e-3
        var = torch.var(x, dim = 1, unbiased = False, keepdim = True)
        mean = torch.mean(x, dim = 1, keepdim = True)
        result =  (x - mean) * (var + eps).rsqrt() * self.k
        return result 

__init__(dim=1)

Normalization layer.

Parameters:

  • dim
              Dimension (axis) to normalize.
    
Source code in odak/learn/models/components.py
def __init__(
             self,
             dim = 1,
            ):
    """
    Normalization layer.


    Parameters
    ----------
    dim             : int
                      Dimension (axis) to normalize.
    """
    super().__init__()
    self.k = torch.nn.Parameter(torch.ones(1, dim, 1, 1))

forward(x)

Forward model.

Parameters:

  • x
            Input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x             : torch.tensor
                    Input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.      
    """
    eps = 1e-5 if x.dtype == torch.float32 else 1e-3
    var = torch.var(x, dim = 1, unbiased = False, keepdim = True)
    mean = torch.mean(x, dim = 1, keepdim = True)
    result =  (x - mean) * (var + eps).rsqrt() * self.k
    return result 

positional_encoder

Bases: Module

A positional encoder module.

Source code in odak/learn/models/components.py
class positional_encoder(torch.nn.Module):
    """
    A positional encoder module.
    """

    def __init__(self, L):
        """
        A positional encoder module.

        Parameters
        ----------
        L                   : int
                              Positional encoding level.
        """
        super(positional_encoder, self).__init__()
        self.L = L


    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x               : torch.tensor
                          Input data.

        Returns
        ----------
        result          : torch.tensor
                          Result of the forward operation
        """
        B, C = x.shape
        x = x.view(B, C, 1)
        results = [x]
        for i in range(1, self.L + 1):
            freq = (2 ** i) * torch.pi
            cos_x = torch.cos(freq * x)
            sin_x = torch.sin(freq * x)
            results.append(cos_x)
            results.append(sin_x)
        results = torch.cat(results, dim=2)
        results = results.permute(0, 2, 1)
        results = results.reshape(B, -1)
        return results

__init__(L)

A positional encoder module.

Parameters:

  • L
                  Positional encoding level.
    
Source code in odak/learn/models/components.py
def __init__(self, L):
    """
    A positional encoder module.

    Parameters
    ----------
    L                   : int
                          Positional encoding level.
    """
    super(positional_encoder, self).__init__()
    self.L = L

forward(x)

Forward model.

Parameters:

  • x
              Input data.
    

Returns:

  • result ( tensor ) –

    Result of the forward operation

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x               : torch.tensor
                      Input data.

    Returns
    ----------
    result          : torch.tensor
                      Result of the forward operation
    """
    B, C = x.shape
    x = x.view(B, C, 1)
    results = [x]
    for i in range(1, self.L + 1):
        freq = (2 ** i) * torch.pi
        cos_x = torch.cos(freq * x)
        sin_x = torch.sin(freq * x)
        results.append(cos_x)
        results.append(sin_x)
    results = torch.cat(results, dim=2)
    results = results.permute(0, 2, 1)
    results = results.reshape(B, -1)
    return results

residual_attention_layer

Bases: Module

A residual block with an attention layer.

Source code in odak/learn/models/components.py
class residual_attention_layer(torch.nn.Module):
    """
    A residual block with an attention layer.
    """
    def __init__(
                 self,
                 input_channels = 2,
                 output_channels = 2,
                 kernel_size = 1,
                 bias = False,
                 activation = torch.nn.ReLU()
                ):
        """
        An attention layer class.


        Parameters
        ----------
        input_channels  : int or optioal
                          Number of input channels.
        output_channels : int or optional
                          Number of middle channels.
        kernel_size     : int or optional
                          Kernel size.
        bias            : bool or optional
                          Set to True to let convolutional layers have bias term.
        activation      : torch.nn or optional
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        self.activation = activation
        self.convolution0 = torch.nn.Sequential(
                                                torch.nn.Conv2d(
                                                                input_channels,
                                                                output_channels,
                                                                kernel_size = kernel_size,
                                                                padding = kernel_size // 2,
                                                                bias = bias
                                                               ),
                                                torch.nn.BatchNorm2d(output_channels)
                                               )
        self.convolution1 = torch.nn.Sequential(
                                                torch.nn.Conv2d(
                                                                input_channels,
                                                                output_channels,
                                                                kernel_size = kernel_size,
                                                                padding = kernel_size // 2,
                                                                bias = bias
                                                               ),
                                                torch.nn.BatchNorm2d(output_channels)
                                               )
        self.final_layer = torch.nn.Sequential(
                                               self.activation,
                                               torch.nn.Conv2d(
                                                               output_channels,
                                                               output_channels,
                                                               kernel_size = kernel_size,
                                                               padding = kernel_size // 2,
                                                               bias = bias
                                                              )
                                              )


    def forward(self, x0, x1):
        """
        Forward model.

        Parameters
        ----------
        x0             : torch.tensor
                         First input data.

        x1             : torch.tensor
                         Seconnd input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.      
        """
        y0 = self.convolution0(x0)
        y1 = self.convolution1(x1)
        y2 = torch.add(y0, y1)
        result = self.final_layer(y2) * x0
        return result

__init__(input_channels=2, output_channels=2, kernel_size=1, bias=False, activation=torch.nn.ReLU())

An attention layer class.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int or optional, default: 2 ) –
              Number of middle channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
             self,
             input_channels = 2,
             output_channels = 2,
             kernel_size = 1,
             bias = False,
             activation = torch.nn.ReLU()
            ):
    """
    An attention layer class.


    Parameters
    ----------
    input_channels  : int or optioal
                      Number of input channels.
    output_channels : int or optional
                      Number of middle channels.
    kernel_size     : int or optional
                      Kernel size.
    bias            : bool or optional
                      Set to True to let convolutional layers have bias term.
    activation      : torch.nn or optional
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    self.activation = activation
    self.convolution0 = torch.nn.Sequential(
                                            torch.nn.Conv2d(
                                                            input_channels,
                                                            output_channels,
                                                            kernel_size = kernel_size,
                                                            padding = kernel_size // 2,
                                                            bias = bias
                                                           ),
                                            torch.nn.BatchNorm2d(output_channels)
                                           )
    self.convolution1 = torch.nn.Sequential(
                                            torch.nn.Conv2d(
                                                            input_channels,
                                                            output_channels,
                                                            kernel_size = kernel_size,
                                                            padding = kernel_size // 2,
                                                            bias = bias
                                                           ),
                                            torch.nn.BatchNorm2d(output_channels)
                                           )
    self.final_layer = torch.nn.Sequential(
                                           self.activation,
                                           torch.nn.Conv2d(
                                                           output_channels,
                                                           output_channels,
                                                           kernel_size = kernel_size,
                                                           padding = kernel_size // 2,
                                                           bias = bias
                                                          )
                                          )

forward(x0, x1)

Forward model.

Parameters:

  • x0
             First input data.
    
  • x1
             Seconnd input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x0, x1):
    """
    Forward model.

    Parameters
    ----------
    x0             : torch.tensor
                     First input data.

    x1             : torch.tensor
                     Seconnd input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.      
    """
    y0 = self.convolution0(x0)
    y1 = self.convolution1(x1)
    y2 = torch.add(y0, y1)
    result = self.final_layer(y2) * x0
    return result

residual_layer

Bases: Module

A residual layer.

Source code in odak/learn/models/components.py
class residual_layer(torch.nn.Module):
    """
    A residual layer.
    """
    def __init__(
                 self,
                 input_channels = 2,
                 mid_channels = 16,
                 kernel_size = 3,
                 bias = False,
                 activation = torch.nn.ReLU()
                ):
        """
        A convolutional layer class.


        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        mid_channels    : int
                          Number of middle channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool 
                          Set to True to let convolutional layers have bias term.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        self.activation = activation
        self.convolution = double_convolution(
                                              input_channels,
                                              mid_channels = mid_channels,
                                              output_channels = input_channels,
                                              kernel_size = kernel_size,
                                              bias = bias,
                                              activation = activation
                                             )


    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x             : torch.tensor
                        Input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.      
        """
        x0 = self.convolution(x)
        return x + x0

__init__(input_channels=2, mid_channels=16, kernel_size=3, bias=False, activation=torch.nn.ReLU())

A convolutional layer class.

Parameters:

  • input_channels
              Number of input channels.
    
  • mid_channels
              Number of middle channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
             self,
             input_channels = 2,
             mid_channels = 16,
             kernel_size = 3,
             bias = False,
             activation = torch.nn.ReLU()
            ):
    """
    A convolutional layer class.


    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    mid_channels    : int
                      Number of middle channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool 
                      Set to True to let convolutional layers have bias term.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    self.activation = activation
    self.convolution = double_convolution(
                                          input_channels,
                                          mid_channels = mid_channels,
                                          output_channels = input_channels,
                                          kernel_size = kernel_size,
                                          bias = bias,
                                          activation = activation
                                         )

forward(x)

Forward model.

Parameters:

  • x
            Input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x             : torch.tensor
                    Input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.      
    """
    x0 = self.convolution(x)
    return x + x0

spatial_gate

Bases: Module

Spatial attention module that applies a convolution layer after channel pooling. This class is heavily inspired by https://github.com/Jongchan/attention-module/blob/master/MODELS/cbam.py.

Source code in odak/learn/models/components.py
class spatial_gate(torch.nn.Module):
    """
    Spatial attention module that applies a convolution layer after channel pooling.
    This class is heavily inspired by https://github.com/Jongchan/attention-module/blob/master/MODELS/cbam.py.
    """
    def __init__(self):
        """
        Initializes the spatial gate module.
        """
        super().__init__()
        kernel_size = 7
        self.spatial = convolution_layer(2, 1, kernel_size, bias = False, activation = torch.nn.Identity())


    def channel_pool(self, x):
        """
        Applies max and average pooling on the channels.

        Parameters
        ----------
        x             : torch.tensor
                        Input tensor.

        Returns
        -------
        output        : torch.tensor
                        Output tensor.
        """
        max_pool = torch.max(x, 1)[0].unsqueeze(1)
        avg_pool = torch.mean(x, 1).unsqueeze(1)
        output = torch.cat((max_pool, avg_pool), dim=1)
        return output


    def forward(self, x):
        """
        Forward pass of the SpatialGate module.

        Applies spatial attention to the input tensor.

        Parameters
        ----------
        x            : torch.tensor
                       Input tensor to the SpatialGate module.

        Returns
        -------
        scaled_x     : torch.tensor
                       Output tensor after applying spatial attention.
        """
        x_compress = self.channel_pool(x)
        x_out = self.spatial(x_compress)
        scale = torch.sigmoid(x_out)
        scaled_x = x * scale
        return scaled_x

__init__()

Initializes the spatial gate module.

Source code in odak/learn/models/components.py
def __init__(self):
    """
    Initializes the spatial gate module.
    """
    super().__init__()
    kernel_size = 7
    self.spatial = convolution_layer(2, 1, kernel_size, bias = False, activation = torch.nn.Identity())

channel_pool(x)

Applies max and average pooling on the channels.

Parameters:

  • x
            Input tensor.
    

Returns:

  • output ( tensor ) –

    Output tensor.

Source code in odak/learn/models/components.py
def channel_pool(self, x):
    """
    Applies max and average pooling on the channels.

    Parameters
    ----------
    x             : torch.tensor
                    Input tensor.

    Returns
    -------
    output        : torch.tensor
                    Output tensor.
    """
    max_pool = torch.max(x, 1)[0].unsqueeze(1)
    avg_pool = torch.mean(x, 1).unsqueeze(1)
    output = torch.cat((max_pool, avg_pool), dim=1)
    return output

forward(x)

Forward pass of the SpatialGate module.

Applies spatial attention to the input tensor.

Parameters:

  • x
           Input tensor to the SpatialGate module.
    

Returns:

  • scaled_x ( tensor ) –

    Output tensor after applying spatial attention.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward pass of the SpatialGate module.

    Applies spatial attention to the input tensor.

    Parameters
    ----------
    x            : torch.tensor
                   Input tensor to the SpatialGate module.

    Returns
    -------
    scaled_x     : torch.tensor
                   Output tensor after applying spatial attention.
    """
    x_compress = self.channel_pool(x)
    x_out = self.spatial(x_compress)
    scale = torch.sigmoid(x_out)
    scaled_x = x * scale
    return scaled_x

spatially_adaptive_convolution

Bases: Module

A spatially adaptive convolution layer.

References

C. Zheng et al. "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions." C. Xu et al. "Squeezesegv3: Spatially-adaptive Convolution for Efficient Point-Cloud Segmentation." C. Zheng et al. "Windowing Decomposition Convolutional Neural Network for Image Enhancement."

Source code in odak/learn/models/components.py
class spatially_adaptive_convolution(torch.nn.Module):
    """
    A spatially adaptive convolution layer.

    References
    ----------

    C. Zheng et al. "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions."
    C. Xu et al. "Squeezesegv3: Spatially-adaptive Convolution for Efficient Point-Cloud Segmentation."
    C. Zheng et al. "Windowing Decomposition Convolutional Neural Network for Image Enhancement."
    """
    def __init__(
                 self,
                 input_channels = 2,
                 output_channels = 2,
                 kernel_size = 3,
                 stride = 1,
                 padding = 1,
                 bias = False,
                 activation = torch.nn.LeakyReLU(0.2, inplace = True)
                ):
        """
        Initializes a spatially adaptive convolution layer.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Size of the convolution kernel.
        stride          : int
                          Stride of the convolution.
        padding         : int
                          Padding added to both sides of the input.
        bias            : bool
                          If True, includes a bias term in the convolution.
        activation      : torch.nn.Module
                          Activation function to apply. If None, no activation is applied.
        """
        super(spatially_adaptive_convolution, self).__init__()
        self.kernel_size = kernel_size
        self.input_channels = input_channels
        self.output_channels = output_channels
        self.stride = stride
        self.padding = padding
        self.standard_convolution = torch.nn.Conv2d(
                                                    in_channels = input_channels,
                                                    out_channels = self.output_channels,
                                                    kernel_size = kernel_size,
                                                    stride = stride,
                                                    padding = padding,
                                                    bias = bias
                                                   )
        self.weight = torch.nn.Parameter(data = self.standard_convolution.weight, requires_grad = True)
        self.activation = activation


    def forward(self, x, sv_kernel_feature):
        """
        Forward pass for the spatially adaptive convolution layer.

        Parameters
        ----------
        x                  : torch.tensor
                            Input data tensor.
                            Dimension: (1, C, H, W)
        sv_kernel_feature   : torch.tensor
                            Spatially varying kernel features.
                            Dimension: (1, C_i * kernel_size * kernel_size, H, W)

        Returns
        -------
        sa_output          : torch.tensor
                            Estimated output tensor.
                            Dimension: (1, output_channels, H_out, W_out)
        """
        # Pad input and sv_kernel_feature if necessary
        if sv_kernel_feature.size(-1) * self.stride != x.size(-1) or sv_kernel_feature.size(
                -2) * self.stride != x.size(-2):
            diffY = sv_kernel_feature.size(-2) % self.stride
            diffX = sv_kernel_feature.size(-1) % self.stride
            sv_kernel_feature = torch.nn.functional.pad(sv_kernel_feature, (diffX // 2, diffX - diffX // 2,
                                                                            diffY // 2, diffY - diffY // 2))
            diffY = x.size(-2) % self.stride
            diffX = x.size(-1) % self.stride
            x = torch.nn.functional.pad(x, (diffX // 2, diffX - diffX // 2,
                                            diffY // 2, diffY - diffY // 2))

        # Unfold the input tensor for matrix multiplication
        input_feature = torch.nn.functional.unfold(
                                                   x,
                                                   kernel_size = (self.kernel_size, self.kernel_size),
                                                   stride = self.stride,
                                                   padding = self.padding
                                                  )

        # Resize sv_kernel_feature to match the input feature
        sv_kernel = sv_kernel_feature.reshape(
                                              1,
                                              self.input_channels * self.kernel_size * self.kernel_size,
                                              (x.size(-2) // self.stride) * (x.size(-1) // self.stride)
                                             )

        # Resize weight to match the input channels and kernel size
        si_kernel = self.weight.reshape(
                                        self.weight_output_channels,
                                        self.input_channels * self.kernel_size * self.kernel_size
                                       )

        # Apply spatially varying kernels
        sv_feature = input_feature * sv_kernel

        # Perform matrix multiplication
        sa_output = torch.matmul(si_kernel, sv_feature).reshape(
                                                                1, self.weight_output_channels,
                                                                (x.size(-2) // self.stride),
                                                                (x.size(-1) // self.stride)
                                                               )
        return sa_output

__init__(input_channels=2, output_channels=2, kernel_size=3, stride=1, padding=1, bias=False, activation=torch.nn.LeakyReLU(0.2, inplace=True))

Initializes a spatially adaptive convolution layer.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int, default: 2 ) –
              Number of output channels.
    
  • kernel_size
              Size of the convolution kernel.
    
  • stride
              Stride of the convolution.
    
  • padding
              Padding added to both sides of the input.
    
  • bias
              If True, includes a bias term in the convolution.
    
  • activation
              Activation function to apply. If None, no activation is applied.
    
Source code in odak/learn/models/components.py
def __init__(
             self,
             input_channels = 2,
             output_channels = 2,
             kernel_size = 3,
             stride = 1,
             padding = 1,
             bias = False,
             activation = torch.nn.LeakyReLU(0.2, inplace = True)
            ):
    """
    Initializes a spatially adaptive convolution layer.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Size of the convolution kernel.
    stride          : int
                      Stride of the convolution.
    padding         : int
                      Padding added to both sides of the input.
    bias            : bool
                      If True, includes a bias term in the convolution.
    activation      : torch.nn.Module
                      Activation function to apply. If None, no activation is applied.
    """
    super(spatially_adaptive_convolution, self).__init__()
    self.kernel_size = kernel_size
    self.input_channels = input_channels
    self.output_channels = output_channels
    self.stride = stride
    self.padding = padding
    self.standard_convolution = torch.nn.Conv2d(
                                                in_channels = input_channels,
                                                out_channels = self.output_channels,
                                                kernel_size = kernel_size,
                                                stride = stride,
                                                padding = padding,
                                                bias = bias
                                               )
    self.weight = torch.nn.Parameter(data = self.standard_convolution.weight, requires_grad = True)
    self.activation = activation

forward(x, sv_kernel_feature)

Forward pass for the spatially adaptive convolution layer.

Parameters:

  • x
                Input data tensor.
                Dimension: (1, C, H, W)
    
  • sv_kernel_feature
                Spatially varying kernel features.
                Dimension: (1, C_i * kernel_size * kernel_size, H, W)
    

Returns:

  • sa_output ( tensor ) –

    Estimated output tensor. Dimension: (1, output_channels, H_out, W_out)

Source code in odak/learn/models/components.py
def forward(self, x, sv_kernel_feature):
    """
    Forward pass for the spatially adaptive convolution layer.

    Parameters
    ----------
    x                  : torch.tensor
                        Input data tensor.
                        Dimension: (1, C, H, W)
    sv_kernel_feature   : torch.tensor
                        Spatially varying kernel features.
                        Dimension: (1, C_i * kernel_size * kernel_size, H, W)

    Returns
    -------
    sa_output          : torch.tensor
                        Estimated output tensor.
                        Dimension: (1, output_channels, H_out, W_out)
    """
    # Pad input and sv_kernel_feature if necessary
    if sv_kernel_feature.size(-1) * self.stride != x.size(-1) or sv_kernel_feature.size(
            -2) * self.stride != x.size(-2):
        diffY = sv_kernel_feature.size(-2) % self.stride
        diffX = sv_kernel_feature.size(-1) % self.stride
        sv_kernel_feature = torch.nn.functional.pad(sv_kernel_feature, (diffX // 2, diffX - diffX // 2,
                                                                        diffY // 2, diffY - diffY // 2))
        diffY = x.size(-2) % self.stride
        diffX = x.size(-1) % self.stride
        x = torch.nn.functional.pad(x, (diffX // 2, diffX - diffX // 2,
                                        diffY // 2, diffY - diffY // 2))

    # Unfold the input tensor for matrix multiplication
    input_feature = torch.nn.functional.unfold(
                                               x,
                                               kernel_size = (self.kernel_size, self.kernel_size),
                                               stride = self.stride,
                                               padding = self.padding
                                              )

    # Resize sv_kernel_feature to match the input feature
    sv_kernel = sv_kernel_feature.reshape(
                                          1,
                                          self.input_channels * self.kernel_size * self.kernel_size,
                                          (x.size(-2) // self.stride) * (x.size(-1) // self.stride)
                                         )

    # Resize weight to match the input channels and kernel size
    si_kernel = self.weight.reshape(
                                    self.weight_output_channels,
                                    self.input_channels * self.kernel_size * self.kernel_size
                                   )

    # Apply spatially varying kernels
    sv_feature = input_feature * sv_kernel

    # Perform matrix multiplication
    sa_output = torch.matmul(si_kernel, sv_feature).reshape(
                                                            1, self.weight_output_channels,
                                                            (x.size(-2) // self.stride),
                                                            (x.size(-1) // self.stride)
                                                           )
    return sa_output

spatially_adaptive_module

Bases: Module

A spatially adaptive module that combines learned spatially adaptive convolutions.

References

Chuanjun Zheng, Yicheng Zhan, Liang Shi, Ozan Cakmakci, and Kaan Akşit, "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions," SIGGRAPH Asia 2024 Technical Communications (SA Technical Communications '24), December, 2024.

Source code in odak/learn/models/components.py
class spatially_adaptive_module(torch.nn.Module):
    """
    A spatially adaptive module that combines learned spatially adaptive convolutions.

    References
    ----------

    Chuanjun Zheng, Yicheng Zhan, Liang Shi, Ozan Cakmakci, and Kaan Akşit, "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions," SIGGRAPH Asia 2024 Technical Communications (SA Technical Communications '24), December, 2024.
    """
    def __init__(
                 self,
                 input_channels = 2,
                 output_channels = 2,
                 kernel_size = 3,
                 stride = 1,
                 padding = 1,
                 bias = False,
                 activation = torch.nn.LeakyReLU(0.2, inplace = True)
                ):
        """
        Initializes a spatially adaptive module.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Size of the convolution kernel.
        stride          : int
                          Stride of the convolution.
        padding         : int
                          Padding added to both sides of the input.
        bias            : bool
                          If True, includes a bias term in the convolution.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super(spatially_adaptive_module, self).__init__()
        self.kernel_size = kernel_size
        self.input_channels = input_channels
        self.output_channels = output_channels
        self.stride = stride
        self.padding = padding
        self.weight_output_channels = self.output_channels - 1
        self.standard_convolution = torch.nn.Conv2d(
                                                    in_channels = input_channels,
                                                    out_channels = self.weight_output_channels,
                                                    kernel_size = kernel_size,
                                                    stride = stride,
                                                    padding = padding,
                                                    bias = bias
                                                   )
        self.weight = torch.nn.Parameter(data = self.standard_convolution.weight, requires_grad = True)
        self.activation = activation


    def forward(self, x, sv_kernel_feature):
        """
        Forward pass for the spatially adaptive module.

        Parameters
        ----------
        x                  : torch.tensor
                            Input data tensor.
                            Dimension: (1, C, H, W)
        sv_kernel_feature   : torch.tensor
                            Spatially varying kernel features.
                            Dimension: (1, C_i * kernel_size * kernel_size, H, W)

        Returns
        -------
        output             : torch.tensor
                            Combined output tensor from standard and spatially adaptive convolutions.
                            Dimension: (1, output_channels, H_out, W_out)
        """
        # Pad input and sv_kernel_feature if necessary
        if sv_kernel_feature.size(-1) * self.stride != x.size(-1) or sv_kernel_feature.size(
                -2) * self.stride != x.size(-2):
            diffY = sv_kernel_feature.size(-2) % self.stride
            diffX = sv_kernel_feature.size(-1) % self.stride
            sv_kernel_feature = torch.nn.functional.pad(sv_kernel_feature, (diffX // 2, diffX - diffX // 2,
                                                                            diffY // 2, diffY - diffY // 2))
            diffY = x.size(-2) % self.stride
            diffX = x.size(-1) % self.stride
            x = torch.nn.functional.pad(x, (diffX // 2, diffX - diffX // 2,
                                            diffY // 2, diffY - diffY // 2))

        # Unfold the input tensor for matrix multiplication
        input_feature = torch.nn.functional.unfold(
                                                   x,
                                                   kernel_size = (self.kernel_size, self.kernel_size),
                                                   stride = self.stride,
                                                   padding = self.padding
                                                  )

        # Resize sv_kernel_feature to match the input feature
        sv_kernel = sv_kernel_feature.reshape(
                                              1,
                                              self.input_channels * self.kernel_size * self.kernel_size,
                                              (x.size(-2) // self.stride) * (x.size(-1) // self.stride)
                                             )

        # Apply sv_kernel to the input_feature
        sv_feature = input_feature * sv_kernel

        # Original spatially varying convolution output
        sv_output = torch.sum(sv_feature, dim = 1).reshape(
                                                           1,
                                                            1,
                                                            (x.size(-2) // self.stride),
                                                            (x.size(-1) // self.stride)
                                                           )

        # Reshape weight for spatially adaptive convolution
        si_kernel = self.weight.reshape(
                                        self.weight_output_channels,
                                        self.input_channels * self.kernel_size * self.kernel_size
                                       )

        # Apply si_kernel on sv convolution output
        sa_output = torch.matmul(si_kernel, sv_feature).reshape(
                                                                1, self.weight_output_channels,
                                                                (x.size(-2) // self.stride),
                                                                (x.size(-1) // self.stride)
                                                               )

        # Combine the outputs and apply activation function
        output = self.activation(torch.cat((sv_output, sa_output), dim = 1))
        return output

__init__(input_channels=2, output_channels=2, kernel_size=3, stride=1, padding=1, bias=False, activation=torch.nn.LeakyReLU(0.2, inplace=True))

Initializes a spatially adaptive module.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int, default: 2 ) –
              Number of output channels.
    
  • kernel_size
              Size of the convolution kernel.
    
  • stride
              Stride of the convolution.
    
  • padding
              Padding added to both sides of the input.
    
  • bias
              If True, includes a bias term in the convolution.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
             self,
             input_channels = 2,
             output_channels = 2,
             kernel_size = 3,
             stride = 1,
             padding = 1,
             bias = False,
             activation = torch.nn.LeakyReLU(0.2, inplace = True)
            ):
    """
    Initializes a spatially adaptive module.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Size of the convolution kernel.
    stride          : int
                      Stride of the convolution.
    padding         : int
                      Padding added to both sides of the input.
    bias            : bool
                      If True, includes a bias term in the convolution.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super(spatially_adaptive_module, self).__init__()
    self.kernel_size = kernel_size
    self.input_channels = input_channels
    self.output_channels = output_channels
    self.stride = stride
    self.padding = padding
    self.weight_output_channels = self.output_channels - 1
    self.standard_convolution = torch.nn.Conv2d(
                                                in_channels = input_channels,
                                                out_channels = self.weight_output_channels,
                                                kernel_size = kernel_size,
                                                stride = stride,
                                                padding = padding,
                                                bias = bias
                                               )
    self.weight = torch.nn.Parameter(data = self.standard_convolution.weight, requires_grad = True)
    self.activation = activation

forward(x, sv_kernel_feature)

Forward pass for the spatially adaptive module.

Parameters:

  • x
                Input data tensor.
                Dimension: (1, C, H, W)
    
  • sv_kernel_feature
                Spatially varying kernel features.
                Dimension: (1, C_i * kernel_size * kernel_size, H, W)
    

Returns:

  • output ( tensor ) –

    Combined output tensor from standard and spatially adaptive convolutions. Dimension: (1, output_channels, H_out, W_out)

Source code in odak/learn/models/components.py
def forward(self, x, sv_kernel_feature):
    """
    Forward pass for the spatially adaptive module.

    Parameters
    ----------
    x                  : torch.tensor
                        Input data tensor.
                        Dimension: (1, C, H, W)
    sv_kernel_feature   : torch.tensor
                        Spatially varying kernel features.
                        Dimension: (1, C_i * kernel_size * kernel_size, H, W)

    Returns
    -------
    output             : torch.tensor
                        Combined output tensor from standard and spatially adaptive convolutions.
                        Dimension: (1, output_channels, H_out, W_out)
    """
    # Pad input and sv_kernel_feature if necessary
    if sv_kernel_feature.size(-1) * self.stride != x.size(-1) or sv_kernel_feature.size(
            -2) * self.stride != x.size(-2):
        diffY = sv_kernel_feature.size(-2) % self.stride
        diffX = sv_kernel_feature.size(-1) % self.stride
        sv_kernel_feature = torch.nn.functional.pad(sv_kernel_feature, (diffX // 2, diffX - diffX // 2,
                                                                        diffY // 2, diffY - diffY // 2))
        diffY = x.size(-2) % self.stride
        diffX = x.size(-1) % self.stride
        x = torch.nn.functional.pad(x, (diffX // 2, diffX - diffX // 2,
                                        diffY // 2, diffY - diffY // 2))

    # Unfold the input tensor for matrix multiplication
    input_feature = torch.nn.functional.unfold(
                                               x,
                                               kernel_size = (self.kernel_size, self.kernel_size),
                                               stride = self.stride,
                                               padding = self.padding
                                              )

    # Resize sv_kernel_feature to match the input feature
    sv_kernel = sv_kernel_feature.reshape(
                                          1,
                                          self.input_channels * self.kernel_size * self.kernel_size,
                                          (x.size(-2) // self.stride) * (x.size(-1) // self.stride)
                                         )

    # Apply sv_kernel to the input_feature
    sv_feature = input_feature * sv_kernel

    # Original spatially varying convolution output
    sv_output = torch.sum(sv_feature, dim = 1).reshape(
                                                       1,
                                                        1,
                                                        (x.size(-2) // self.stride),
                                                        (x.size(-1) // self.stride)
                                                       )

    # Reshape weight for spatially adaptive convolution
    si_kernel = self.weight.reshape(
                                    self.weight_output_channels,
                                    self.input_channels * self.kernel_size * self.kernel_size
                                   )

    # Apply si_kernel on sv convolution output
    sa_output = torch.matmul(si_kernel, sv_feature).reshape(
                                                            1, self.weight_output_channels,
                                                            (x.size(-2) // self.stride),
                                                            (x.size(-1) // self.stride)
                                                           )

    # Combine the outputs and apply activation function
    output = self.activation(torch.cat((sv_output, sa_output), dim = 1))
    return output

upsample_convtranspose2d_layer

Bases: Module

An upsampling convtranspose2d layer.

Source code in odak/learn/models/components.py
class upsample_convtranspose2d_layer(torch.nn.Module):
    """
    An upsampling convtranspose2d layer.
    """
    def __init__(
                 self,
                 input_channels,
                 output_channels,
                 kernel_size = 2,
                 stride = 2,
                 bias = False,
                ):
        """
        A downscaling component with a double convolution.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        """
        super().__init__()
        self.up = torch.nn.ConvTranspose2d(
                                           in_channels = input_channels,
                                           out_channels = output_channels,
                                           bias = bias,
                                           kernel_size = kernel_size,
                                           stride = stride
                                          )

    def forward(self, x1, x2):
        """
        Forward model.

        Parameters
        ----------
        x1             : torch.tensor
                         First input data.
        x2             : torch.tensor
                         Second input data.


        Returns
        ----------
        result        : torch.tensor
                        Result of the forward operation
        """
        x1 = self.up(x1)
        diffY = x2.size()[2] - x1.size()[2]
        diffX = x2.size()[3] - x1.size()[3]
        x1 = torch.nn.functional.pad(x1, [diffX // 2, diffX - diffX // 2,
                                          diffY // 2, diffY - diffY // 2])
        result = x1 + x2
        return result

__init__(input_channels, output_channels, kernel_size=2, stride=2, bias=False)

A downscaling component with a double convolution.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int) –
              Number of output channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
Source code in odak/learn/models/components.py
def __init__(
             self,
             input_channels,
             output_channels,
             kernel_size = 2,
             stride = 2,
             bias = False,
            ):
    """
    A downscaling component with a double convolution.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    """
    super().__init__()
    self.up = torch.nn.ConvTranspose2d(
                                       in_channels = input_channels,
                                       out_channels = output_channels,
                                       bias = bias,
                                       kernel_size = kernel_size,
                                       stride = stride
                                      )

forward(x1, x2)

Forward model.

Parameters:

  • x1
             First input data.
    
  • x2
             Second input data.
    

Returns:

  • result ( tensor ) –

    Result of the forward operation

Source code in odak/learn/models/components.py
def forward(self, x1, x2):
    """
    Forward model.

    Parameters
    ----------
    x1             : torch.tensor
                     First input data.
    x2             : torch.tensor
                     Second input data.


    Returns
    ----------
    result        : torch.tensor
                    Result of the forward operation
    """
    x1 = self.up(x1)
    diffY = x2.size()[2] - x1.size()[2]
    diffX = x2.size()[3] - x1.size()[3]
    x1 = torch.nn.functional.pad(x1, [diffX // 2, diffX - diffX // 2,
                                      diffY // 2, diffY - diffY // 2])
    result = x1 + x2
    return result

upsample_layer

Bases: Module

An upsampling convolutional layer.

Source code in odak/learn/models/components.py
class upsample_layer(torch.nn.Module):
    """
    An upsampling convolutional layer.
    """
    def __init__(
                 self,
                 input_channels,
                 output_channels,
                 kernel_size = 3,
                 bias = False,
                 activation = torch.nn.ReLU(),
                 bilinear = True
                ):
        """
        A downscaling component with a double convolution.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool 
                          Set to True to let convolutional layers have bias term.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        bilinear        : bool
                          If set to True, bilinear sampling is used.
        """
        super(upsample_layer, self).__init__()
        if bilinear:
            self.up = torch.nn.Upsample(scale_factor = 2, mode = 'bilinear', align_corners = True)
            self.conv = double_convolution(
                                           input_channels = input_channels + output_channels,
                                           mid_channels = input_channels // 2,
                                           output_channels = output_channels,
                                           kernel_size = kernel_size,
                                           bias = bias,
                                           activation = activation
                                          )
        else:
            self.up = torch.nn.ConvTranspose2d(input_channels , input_channels // 2, kernel_size = 2, stride = 2)
            self.conv = double_convolution(
                                           input_channels = input_channels,
                                           mid_channels = output_channels,
                                           output_channels = output_channels,
                                           kernel_size = kernel_size,
                                           bias = bias,
                                           activation = activation
                                          )


    def forward(self, x1, x2):
        """
        Forward model.

        Parameters
        ----------
        x1             : torch.tensor
                         First input data.
        x2             : torch.tensor
                         Second input data.


        Returns
        ----------
        result        : torch.tensor
                        Result of the forward operation
        """ 
        x1 = self.up(x1)
        diffY = x2.size()[2] - x1.size()[2]
        diffX = x2.size()[3] - x1.size()[3]
        x1 = torch.nn.functional.pad(x1, [diffX // 2, diffX - diffX // 2,
                                          diffY // 2, diffY - diffY // 2])
        x = torch.cat([x2, x1], dim = 1)
        result = self.conv(x)
        return result

__init__(input_channels, output_channels, kernel_size=3, bias=False, activation=torch.nn.ReLU(), bilinear=True)

A downscaling component with a double convolution.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int) –
              Number of output channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
  • bilinear
              If set to True, bilinear sampling is used.
    
Source code in odak/learn/models/components.py
def __init__(
             self,
             input_channels,
             output_channels,
             kernel_size = 3,
             bias = False,
             activation = torch.nn.ReLU(),
             bilinear = True
            ):
    """
    A downscaling component with a double convolution.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool 
                      Set to True to let convolutional layers have bias term.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    bilinear        : bool
                      If set to True, bilinear sampling is used.
    """
    super(upsample_layer, self).__init__()
    if bilinear:
        self.up = torch.nn.Upsample(scale_factor = 2, mode = 'bilinear', align_corners = True)
        self.conv = double_convolution(
                                       input_channels = input_channels + output_channels,
                                       mid_channels = input_channels // 2,
                                       output_channels = output_channels,
                                       kernel_size = kernel_size,
                                       bias = bias,
                                       activation = activation
                                      )
    else:
        self.up = torch.nn.ConvTranspose2d(input_channels , input_channels // 2, kernel_size = 2, stride = 2)
        self.conv = double_convolution(
                                       input_channels = input_channels,
                                       mid_channels = output_channels,
                                       output_channels = output_channels,
                                       kernel_size = kernel_size,
                                       bias = bias,
                                       activation = activation
                                      )

forward(x1, x2)

Forward model.

Parameters:

  • x1
             First input data.
    
  • x2
             Second input data.
    

Returns:

  • result ( tensor ) –

    Result of the forward operation

Source code in odak/learn/models/components.py
def forward(self, x1, x2):
    """
    Forward model.

    Parameters
    ----------
    x1             : torch.tensor
                     First input data.
    x2             : torch.tensor
                     Second input data.


    Returns
    ----------
    result        : torch.tensor
                    Result of the forward operation
    """ 
    x1 = self.up(x1)
    diffY = x2.size()[2] - x1.size()[2]
    diffX = x2.size()[3] - x1.size()[3]
    x1 = torch.nn.functional.pad(x1, [diffX // 2, diffX - diffX // 2,
                                      diffY // 2, diffY - diffY // 2])
    x = torch.cat([x2, x1], dim = 1)
    result = self.conv(x)
    return result

gaussian(x, multiplier=1.0)

A Gaussian non-linear activation. For more details: Ramasinghe, Sameera, and Simon Lucey. "Beyond periodicity: Towards a unifying framework for activations in coordinate-mlps." In European Conference on Computer Vision, pp. 142-158. Cham: Springer Nature Switzerland, 2022.

Parameters:

  • x
           Input data.
    
  • multiplier
           Multiplier.
    

Returns:

  • result ( float or tensor ) –

    Ouput data.

Source code in odak/learn/models/components.py
def gaussian(x, multiplier = 1.):
    """
    A Gaussian non-linear activation.
    For more details: Ramasinghe, Sameera, and Simon Lucey. "Beyond periodicity: Towards a unifying framework for activations in coordinate-mlps." In European Conference on Computer Vision, pp. 142-158. Cham: Springer Nature Switzerland, 2022.

    Parameters
    ----------
    x            : float or torch.tensor
                   Input data.
    multiplier   : float or torch.tensor
                   Multiplier.

    Returns
    -------
    result       : float or torch.tensor
                   Ouput data.
    """
    result = torch.exp(- (multiplier * x) ** 2)
    return result

swish(x)

A swish non-linear activation. For more details: https://en.wikipedia.org/wiki/Swish_function

Parameters:

  • x
             Input.
    

Returns:

  • out ( float or tensor ) –

    Output.

Source code in odak/learn/models/components.py
def swish(x):
    """
    A swish non-linear activation.
    For more details: https://en.wikipedia.org/wiki/Swish_function

    Parameters
    -----------
    x              : float or torch.tensor
                     Input.

    Returns
    -------
    out            : float or torch.tensor
                     Output.
    """
    out = x * torch.sigmoid(x)
    return out

channel_gate

Bases: Module

Channel attention module with various pooling strategies. This class is heavily inspired https://github.com/Jongchan/attention-module/commit/e4ee180f1335c09db14d39a65d97c8ca3d1f7b16 (MIT License).

Source code in odak/learn/models/components.py
class channel_gate(torch.nn.Module):
    """
    Channel attention module with various pooling strategies.
    This class is heavily inspired https://github.com/Jongchan/attention-module/commit/e4ee180f1335c09db14d39a65d97c8ca3d1f7b16 (MIT License).
    """
    def __init__(
                 self, 
                 gate_channels, 
                 reduction_ratio = 16, 
                 pool_types = ['avg', 'max']
                ):
        """
        Initializes the channel gate module.

        Parameters
        ----------
        gate_channels   : int
                          Number of channels of the input feature map.
        reduction_ratio : int
                          Reduction ratio for the intermediate layer.
        pool_types      : list
                          List of pooling operations to apply.
        """
        super().__init__()
        self.gate_channels = gate_channels
        hidden_channels = gate_channels // reduction_ratio
        if hidden_channels == 0:
            hidden_channels = 1
        self.mlp = torch.nn.Sequential(
                                       convolutional_block_attention.Flatten(),
                                       torch.nn.Linear(gate_channels, hidden_channels),
                                       torch.nn.ReLU(),
                                       torch.nn.Linear(hidden_channels, gate_channels)
                                      )
        self.pool_types = pool_types


    def forward(self, x):
        """
        Forward pass of the ChannelGate module.

        Applies channel-wise attention to the input tensor.

        Parameters
        ----------
        x            : torch.tensor
                       Input tensor to the ChannelGate module.

        Returns
        -------
        output       : torch.tensor
                       Output tensor after applying channel attention.
        """
        channel_att_sum = None
        for pool_type in self.pool_types:
            if pool_type == 'avg':
                pool = torch.nn.functional.avg_pool2d(x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
            elif pool_type == 'max':
                pool = torch.nn.functional.max_pool2d(x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
            channel_att_raw = self.mlp(pool)
            channel_att_sum = channel_att_raw if channel_att_sum is None else channel_att_sum + channel_att_raw
        scale = torch.sigmoid(channel_att_sum).unsqueeze(2).unsqueeze(3).expand_as(x)
        output = x * scale
        return output

__init__(gate_channels, reduction_ratio=16, pool_types=['avg', 'max'])

Initializes the channel gate module.

Parameters:

  • gate_channels
              Number of channels of the input feature map.
    
  • reduction_ratio (int, default: 16 ) –
              Reduction ratio for the intermediate layer.
    
  • pool_types
              List of pooling operations to apply.
    
Source code in odak/learn/models/components.py
def __init__(
             self, 
             gate_channels, 
             reduction_ratio = 16, 
             pool_types = ['avg', 'max']
            ):
    """
    Initializes the channel gate module.

    Parameters
    ----------
    gate_channels   : int
                      Number of channels of the input feature map.
    reduction_ratio : int
                      Reduction ratio for the intermediate layer.
    pool_types      : list
                      List of pooling operations to apply.
    """
    super().__init__()
    self.gate_channels = gate_channels
    hidden_channels = gate_channels // reduction_ratio
    if hidden_channels == 0:
        hidden_channels = 1
    self.mlp = torch.nn.Sequential(
                                   convolutional_block_attention.Flatten(),
                                   torch.nn.Linear(gate_channels, hidden_channels),
                                   torch.nn.ReLU(),
                                   torch.nn.Linear(hidden_channels, gate_channels)
                                  )
    self.pool_types = pool_types

forward(x)

Forward pass of the ChannelGate module.

Applies channel-wise attention to the input tensor.

Parameters:

  • x
           Input tensor to the ChannelGate module.
    

Returns:

  • output ( tensor ) –

    Output tensor after applying channel attention.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward pass of the ChannelGate module.

    Applies channel-wise attention to the input tensor.

    Parameters
    ----------
    x            : torch.tensor
                   Input tensor to the ChannelGate module.

    Returns
    -------
    output       : torch.tensor
                   Output tensor after applying channel attention.
    """
    channel_att_sum = None
    for pool_type in self.pool_types:
        if pool_type == 'avg':
            pool = torch.nn.functional.avg_pool2d(x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
        elif pool_type == 'max':
            pool = torch.nn.functional.max_pool2d(x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
        channel_att_raw = self.mlp(pool)
        channel_att_sum = channel_att_raw if channel_att_sum is None else channel_att_sum + channel_att_raw
    scale = torch.sigmoid(channel_att_sum).unsqueeze(2).unsqueeze(3).expand_as(x)
    output = x * scale
    return output

convolution_layer

Bases: Module

A convolution layer.

Source code in odak/learn/models/components.py
class convolution_layer(torch.nn.Module):
    """
    A convolution layer.
    """
    def __init__(
                 self,
                 input_channels = 2,
                 output_channels = 2,
                 kernel_size = 3,
                 bias = False,
                 stride = 1,
                 normalization = True,
                 activation = torch.nn.ReLU()
                ):
        """
        A convolutional layer class.


        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        layers = [
            torch.nn.Conv2d(
                            input_channels,
                            output_channels,
                            kernel_size = kernel_size,
                            stride = stride,
                            padding = kernel_size // 2,
                            bias = bias
                           )
        ]
        if normalization:
            layers.append(torch.nn.BatchNorm2d(output_channels))
        if activation:
            layers.append(activation)
        self.model = torch.nn.Sequential(*layers)


    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x             : torch.tensor
                        Input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        result = self.model(x)
        return result

__init__(input_channels=2, output_channels=2, kernel_size=3, bias=False, stride=1, normalization=True, activation=torch.nn.ReLU())

A convolutional layer class.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int, default: 2 ) –
              Number of output channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • normalization
              If True, adds a Batch Normalization layer after the convolutional layer.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
             self,
             input_channels = 2,
             output_channels = 2,
             kernel_size = 3,
             bias = False,
             stride = 1,
             normalization = True,
             activation = torch.nn.ReLU()
            ):
    """
    A convolutional layer class.


    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    layers = [
        torch.nn.Conv2d(
                        input_channels,
                        output_channels,
                        kernel_size = kernel_size,
                        stride = stride,
                        padding = kernel_size // 2,
                        bias = bias
                       )
    ]
    if normalization:
        layers.append(torch.nn.BatchNorm2d(output_channels))
    if activation:
        layers.append(activation)
    self.model = torch.nn.Sequential(*layers)

forward(x)

Forward model.

Parameters:

  • x
            Input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x             : torch.tensor
                    Input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    result = self.model(x)
    return result

convolutional_block_attention

Bases: Module

Convolutional Block Attention Module (CBAM) class. This class is heavily inspired https://github.com/Jongchan/attention-module/commit/e4ee180f1335c09db14d39a65d97c8ca3d1f7b16 (MIT License).

Source code in odak/learn/models/components.py
class convolutional_block_attention(torch.nn.Module):
    """
    Convolutional Block Attention Module (CBAM) class. 
    This class is heavily inspired https://github.com/Jongchan/attention-module/commit/e4ee180f1335c09db14d39a65d97c8ca3d1f7b16 (MIT License).
    """
    def __init__(
                 self, 
                 gate_channels, 
                 reduction_ratio = 16, 
                 pool_types = ['avg', 'max'], 
                 no_spatial = False
                ):
        """
        Initializes the convolutional block attention module.

        Parameters
        ----------
        gate_channels   : int
                          Number of channels of the input feature map.
        reduction_ratio : int
                          Reduction ratio for the channel attention.
        pool_types      : list
                          List of pooling operations to apply for channel attention.
        no_spatial      : bool
                          If True, spatial attention is not applied.
        """
        super(convolutional_block_attention, self).__init__()
        self.channel_gate = channel_gate(gate_channels, reduction_ratio, pool_types)
        self.no_spatial = no_spatial
        if not no_spatial:
            self.spatial_gate = spatial_gate()


    class Flatten(torch.nn.Module):
        """
        Flattens the input tensor to a 2D matrix.
        """
        def forward(self, x):
            return x.view(x.size(0), -1)


    def forward(self, x):
        """
        Forward pass of the convolutional block attention module.

        Parameters
        ----------
        x            : torch.tensor
                       Input tensor to the CBAM module.

        Returns
        -------
        x_out        : torch.tensor
                       Output tensor after applying channel and spatial attention.
        """
        x_out = self.channel_gate(x)
        if not self.no_spatial:
            x_out = self.spatial_gate(x_out)
        return x_out

Flatten

Bases: Module

Flattens the input tensor to a 2D matrix.

Source code in odak/learn/models/components.py
class Flatten(torch.nn.Module):
    """
    Flattens the input tensor to a 2D matrix.
    """
    def forward(self, x):
        return x.view(x.size(0), -1)

__init__(gate_channels, reduction_ratio=16, pool_types=['avg', 'max'], no_spatial=False)

Initializes the convolutional block attention module.

Parameters:

  • gate_channels
              Number of channels of the input feature map.
    
  • reduction_ratio (int, default: 16 ) –
              Reduction ratio for the channel attention.
    
  • pool_types
              List of pooling operations to apply for channel attention.
    
  • no_spatial
              If True, spatial attention is not applied.
    
Source code in odak/learn/models/components.py
def __init__(
             self, 
             gate_channels, 
             reduction_ratio = 16, 
             pool_types = ['avg', 'max'], 
             no_spatial = False
            ):
    """
    Initializes the convolutional block attention module.

    Parameters
    ----------
    gate_channels   : int
                      Number of channels of the input feature map.
    reduction_ratio : int
                      Reduction ratio for the channel attention.
    pool_types      : list
                      List of pooling operations to apply for channel attention.
    no_spatial      : bool
                      If True, spatial attention is not applied.
    """
    super(convolutional_block_attention, self).__init__()
    self.channel_gate = channel_gate(gate_channels, reduction_ratio, pool_types)
    self.no_spatial = no_spatial
    if not no_spatial:
        self.spatial_gate = spatial_gate()

forward(x)

Forward pass of the convolutional block attention module.

Parameters:

  • x
           Input tensor to the CBAM module.
    

Returns:

  • x_out ( tensor ) –

    Output tensor after applying channel and spatial attention.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward pass of the convolutional block attention module.

    Parameters
    ----------
    x            : torch.tensor
                   Input tensor to the CBAM module.

    Returns
    -------
    x_out        : torch.tensor
                   Output tensor after applying channel and spatial attention.
    """
    x_out = self.channel_gate(x)
    if not self.no_spatial:
        x_out = self.spatial_gate(x_out)
    return x_out

double_convolution

Bases: Module

A double convolution layer.

Source code in odak/learn/models/components.py
class double_convolution(torch.nn.Module):
    """
    A double convolution layer.
    """
    def __init__(
                 self,
                 input_channels = 2,
                 mid_channels = None,
                 output_channels = 2,
                 kernel_size = 3, 
                 bias = False,
                 normalization = True,
                 activation = torch.nn.ReLU()
                ):
        """
        Double convolution model.


        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        mid_channels    : int
                          Number of channels in the hidden layer between two convolutions.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool 
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        if isinstance(mid_channels, type(None)):
            mid_channels = output_channels
        self.activation = activation
        self.model = torch.nn.Sequential(
                                         convolution_layer(
                                                           input_channels = input_channels,
                                                           output_channels = mid_channels,
                                                           kernel_size = kernel_size,
                                                           bias = bias,
                                                           normalization = normalization,
                                                           activation = self.activation
                                                          ),
                                         convolution_layer(
                                                           input_channels = mid_channels,
                                                           output_channels = output_channels,
                                                           kernel_size = kernel_size,
                                                           bias = bias,
                                                           normalization = normalization,
                                                           activation = self.activation
                                                          )
                                        )


    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x             : torch.tensor
                        Input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.      
        """
        result = self.model(x)
        return result

__init__(input_channels=2, mid_channels=None, output_channels=2, kernel_size=3, bias=False, normalization=True, activation=torch.nn.ReLU())

Double convolution model.

Parameters:

  • input_channels
              Number of input channels.
    
  • mid_channels
              Number of channels in the hidden layer between two convolutions.
    
  • output_channels (int, default: 2 ) –
              Number of output channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • normalization
              If True, adds a Batch Normalization layer after the convolutional layer.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
             self,
             input_channels = 2,
             mid_channels = None,
             output_channels = 2,
             kernel_size = 3, 
             bias = False,
             normalization = True,
             activation = torch.nn.ReLU()
            ):
    """
    Double convolution model.


    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    mid_channels    : int
                      Number of channels in the hidden layer between two convolutions.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool 
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    if isinstance(mid_channels, type(None)):
        mid_channels = output_channels
    self.activation = activation
    self.model = torch.nn.Sequential(
                                     convolution_layer(
                                                       input_channels = input_channels,
                                                       output_channels = mid_channels,
                                                       kernel_size = kernel_size,
                                                       bias = bias,
                                                       normalization = normalization,
                                                       activation = self.activation
                                                      ),
                                     convolution_layer(
                                                       input_channels = mid_channels,
                                                       output_channels = output_channels,
                                                       kernel_size = kernel_size,
                                                       bias = bias,
                                                       normalization = normalization,
                                                       activation = self.activation
                                                      )
                                    )

forward(x)

Forward model.

Parameters:

  • x
            Input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x             : torch.tensor
                    Input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.      
    """
    result = self.model(x)
    return result

downsample_layer

Bases: Module

A downscaling component followed by a double convolution.

Source code in odak/learn/models/components.py
class downsample_layer(torch.nn.Module):
    """
    A downscaling component followed by a double convolution.
    """
    def __init__(
                 self,
                 input_channels,
                 output_channels,
                 kernel_size = 3,
                 bias = False,
                 activation = torch.nn.ReLU()
                ):
        """
        A downscaling component with a double convolution.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool 
                          Set to True to let convolutional layers have bias term.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        self.maxpool_conv = torch.nn.Sequential(
                                                torch.nn.MaxPool2d(2),
                                                double_convolution(
                                                                   input_channels = input_channels,
                                                                   mid_channels = output_channels,
                                                                   output_channels = output_channels,
                                                                   kernel_size = kernel_size,
                                                                   bias = bias,
                                                                   activation = activation
                                                                  )
                                               )


    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x              : torch.tensor
                         First input data.



        Returns
        ----------
        result        : torch.tensor
                        Estimated output.      
        """
        result = self.maxpool_conv(x)
        return result

__init__(input_channels, output_channels, kernel_size=3, bias=False, activation=torch.nn.ReLU())

A downscaling component with a double convolution.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int) –
              Number of output channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
             self,
             input_channels,
             output_channels,
             kernel_size = 3,
             bias = False,
             activation = torch.nn.ReLU()
            ):
    """
    A downscaling component with a double convolution.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool 
                      Set to True to let convolutional layers have bias term.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    self.maxpool_conv = torch.nn.Sequential(
                                            torch.nn.MaxPool2d(2),
                                            double_convolution(
                                                               input_channels = input_channels,
                                                               mid_channels = output_channels,
                                                               output_channels = output_channels,
                                                               kernel_size = kernel_size,
                                                               bias = bias,
                                                               activation = activation
                                                              )
                                           )

forward(x)

Forward model.

Parameters:

  • x
             First input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x              : torch.tensor
                     First input data.



    Returns
    ----------
    result        : torch.tensor
                    Estimated output.      
    """
    result = self.maxpool_conv(x)
    return result

global_feature_module

Bases: Module

A global feature layer that processes global features from input channels and applies them to another input tensor via learned transformations.

Source code in odak/learn/models/components.py
class global_feature_module(torch.nn.Module):
    """
    A global feature layer that processes global features from input channels and
    applies them to another input tensor via learned transformations.
    """
    def __init__(
                 self,
                 input_channels,
                 mid_channels,
                 output_channels,
                 kernel_size,
                 bias = False,
                 normalization = False,
                 activation = torch.nn.ReLU()
                ):
        """
        A global feature layer.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        mid_channels  : int
                          Number of mid channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        normalization   : bool
                          If True, adds a Batch Normalization layer after the convolutional layer.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        self.transformations_1 = global_transformations(input_channels, output_channels)
        self.global_features_1 = double_convolution(
                                                    input_channels = input_channels,
                                                    mid_channels = mid_channels,
                                                    output_channels = output_channels,
                                                    kernel_size = kernel_size,
                                                    bias = bias,
                                                    normalization = normalization,
                                                    activation = activation
                                                   )
        self.global_features_2 = double_convolution(
                                                    input_channels = input_channels,
                                                    mid_channels = mid_channels,
                                                    output_channels = output_channels,
                                                    kernel_size = kernel_size,
                                                    bias = bias,
                                                    normalization = normalization,
                                                    activation = activation
                                                   )
        self.transformations_2 = global_transformations(input_channels, output_channels)


    def forward(self, x1, x2):
        """
        Forward model.

        Parameters
        ----------
        x1             : torch.tensor
                         First input data.
        x2             : torch.tensor
                         Second input data.

        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        global_tensor_1 = self.transformations_1(x1, x2)
        y1 = self.global_features_1(global_tensor_1)
        y2 = self.global_features_2(y1)
        global_tensor_2 = self.transformations_2(y1, y2)
        return global_tensor_2

__init__(input_channels, mid_channels, output_channels, kernel_size, bias=False, normalization=False, activation=torch.nn.ReLU())

A global feature layer.

Parameters:

  • input_channels
              Number of input channels.
    
  • mid_channels
              Number of mid channels.
    
  • output_channels (int) –
              Number of output channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • normalization
              If True, adds a Batch Normalization layer after the convolutional layer.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
             self,
             input_channels,
             mid_channels,
             output_channels,
             kernel_size,
             bias = False,
             normalization = False,
             activation = torch.nn.ReLU()
            ):
    """
    A global feature layer.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    mid_channels  : int
                      Number of mid channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    normalization   : bool
                      If True, adds a Batch Normalization layer after the convolutional layer.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    self.transformations_1 = global_transformations(input_channels, output_channels)
    self.global_features_1 = double_convolution(
                                                input_channels = input_channels,
                                                mid_channels = mid_channels,
                                                output_channels = output_channels,
                                                kernel_size = kernel_size,
                                                bias = bias,
                                                normalization = normalization,
                                                activation = activation
                                               )
    self.global_features_2 = double_convolution(
                                                input_channels = input_channels,
                                                mid_channels = mid_channels,
                                                output_channels = output_channels,
                                                kernel_size = kernel_size,
                                                bias = bias,
                                                normalization = normalization,
                                                activation = activation
                                               )
    self.transformations_2 = global_transformations(input_channels, output_channels)

forward(x1, x2)

Forward model.

Parameters:

  • x1
             First input data.
    
  • x2
             Second input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x1, x2):
    """
    Forward model.

    Parameters
    ----------
    x1             : torch.tensor
                     First input data.
    x2             : torch.tensor
                     Second input data.

    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    global_tensor_1 = self.transformations_1(x1, x2)
    y1 = self.global_features_1(global_tensor_1)
    y2 = self.global_features_2(y1)
    global_tensor_2 = self.transformations_2(y1, y2)
    return global_tensor_2

global_transformations

Bases: Module

A global feature layer that processes global features from input channels and applies learned transformations to another input tensor.

This implementation is adapted from RSGUnet: https://github.com/MTLab/rsgunet_image_enhance.

Reference: J. Huang, P. Zhu, M. Geng et al. "Range Scaling Global U-Net for Perceptual Image Enhancement on Mobile Devices."

Source code in odak/learn/models/components.py
class global_transformations(torch.nn.Module):
    """
    A global feature layer that processes global features from input channels and
    applies learned transformations to another input tensor.

    This implementation is adapted from RSGUnet:
    https://github.com/MTLab/rsgunet_image_enhance.

    Reference:
    J. Huang, P. Zhu, M. Geng et al. "Range Scaling Global U-Net for Perceptual Image Enhancement on Mobile Devices."
    """
    def __init__(
                 self,
                 input_channels,
                 output_channels
                ):
        """
        A global feature layer.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        """
        super().__init__()
        self.global_feature_1 = torch.nn.Sequential(
            torch.nn.Linear(input_channels, output_channels),
            torch.nn.LeakyReLU(0.2, inplace = True),
        )
        self.global_feature_2 = torch.nn.Sequential(
            torch.nn.Linear(output_channels, output_channels),
            torch.nn.LeakyReLU(0.2, inplace = True)
        )


    def forward(self, x1, x2):
        """
        Forward model.

        Parameters
        ----------
        x1             : torch.tensor
                         First input data.
        x2             : torch.tensor
                         Second input data.

        Returns
        ----------
        result        : torch.tensor
                        Estimated output.
        """
        y = torch.mean(x2, dim = (2, 3))
        y1 = self.global_feature_1(y)
        y2 = self.global_feature_2(y1)
        y1 = y1.unsqueeze(2).unsqueeze(3)
        y2 = y2.unsqueeze(2).unsqueeze(3)
        result = x1 * y1 + y2
        return result

__init__(input_channels, output_channels)

A global feature layer.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int) –
              Number of output channels.
    
Source code in odak/learn/models/components.py
def __init__(
             self,
             input_channels,
             output_channels
            ):
    """
    A global feature layer.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    """
    super().__init__()
    self.global_feature_1 = torch.nn.Sequential(
        torch.nn.Linear(input_channels, output_channels),
        torch.nn.LeakyReLU(0.2, inplace = True),
    )
    self.global_feature_2 = torch.nn.Sequential(
        torch.nn.Linear(output_channels, output_channels),
        torch.nn.LeakyReLU(0.2, inplace = True)
    )

forward(x1, x2)

Forward model.

Parameters:

  • x1
             First input data.
    
  • x2
             Second input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x1, x2):
    """
    Forward model.

    Parameters
    ----------
    x1             : torch.tensor
                     First input data.
    x2             : torch.tensor
                     Second input data.

    Returns
    ----------
    result        : torch.tensor
                    Estimated output.
    """
    y = torch.mean(x2, dim = (2, 3))
    y1 = self.global_feature_1(y)
    y2 = self.global_feature_2(y1)
    y1 = y1.unsqueeze(2).unsqueeze(3)
    y2 = y2.unsqueeze(2).unsqueeze(3)
    result = x1 * y1 + y2
    return result

multi_layer_perceptron

Bases: Module

A multi-layer perceptron model.

Source code in odak/learn/models/models.py
class multi_layer_perceptron(torch.nn.Module):
    """
    A multi-layer perceptron model.
    """

    def __init__(self,
                 dimensions,
                 activation = torch.nn.ReLU(),
                 bias = False,
                 model_type = 'conventional',
                 siren_multiplier = 1.,
                 input_multiplier = None
                ):
        """
        Parameters
        ----------
        dimensions        : list
                            List of integers representing the dimensions of each layer (e.g., [2, 10, 1], where the first layer has two channels and last one has one channel.).
        activation        : torch.nn
                            Nonlinear activation function.
                            Default is `torch.nn.ReLU()`.
        bias              : bool
                            If set to True, linear layers will include biases.
        siren_multiplier  : float
                            When using `SIREN` model type, this parameter functions as a hyperparameter.
                            The original SIREN work uses 30.
                            You can bypass this parameter by providing input that are not normalized and larger then one.
        input_multiplier  : float
                            Initial value of the input multiplier before the very first layer.
        model_type        : str
                            Model type: `conventional`, `swish`, `SIREN`, `FILM SIREN`, `Gaussian`.
                            `conventional` refers to a standard multi layer perceptron.
                            For `SIREN,` see: Sitzmann, Vincent, et al. "Implicit neural representations with periodic activation functions." Advances in neural information processing systems 33 (2020): 7462-7473.
                            For `Swish,` see: Ramachandran, Prajit, Barret Zoph, and Quoc V. Le. "Searching for activation functions." arXiv preprint arXiv:1710.05941 (2017). 
                            For `FILM SIREN,` see: Chan, Eric R., et al. "pi-gan: Periodic implicit generative adversarial networks for 3d-aware image synthesis." Proceedings of the IEEE/CVF conference on computer vision and pattern recognition. 2021.
                            For `Gaussian,` see: Ramasinghe, Sameera, and Simon Lucey. "Beyond periodicity: Towards a unifying framework for activations in coordinate-mlps." In European Conference on Computer Vision, pp. 142-158. Cham: Springer Nature Switzerland, 2022.
        """
        super(multi_layer_perceptron, self).__init__()
        self.activation = activation
        self.bias = bias
        self.model_type = model_type
        self.layers = torch.nn.ModuleList()
        self.siren_multiplier = siren_multiplier
        self.dimensions = dimensions
        for i in range(len(self.dimensions) - 1):
            self.layers.append(torch.nn.Linear(self.dimensions[i], self.dimensions[i + 1], bias = self.bias))
        if not isinstance(input_multiplier, type(None)):
            self.input_multiplier = torch.nn.ParameterList()
            self.input_multiplier.append(torch.nn.Parameter(torch.ones(1, self.dimensions[0]) * input_multiplier))
        if self.model_type == 'FILM SIREN':
            self.alpha = torch.nn.ParameterList()
            for j in self.dimensions[1:-1]:
                self.alpha.append(torch.nn.Parameter(torch.randn(2, 1, j)))
        if self.model_type == 'Gaussian':
            self.alpha = torch.nn.ParameterList()
            for j in self.dimensions[1:-1]:
                self.alpha.append(torch.nn.Parameter(torch.randn(1, 1, j)))


    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x             : torch.tensor
                        Input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.      
        """
        if hasattr(self, 'input_multiplier'):
            result = x * self.input_multiplier[0]
        else:
            result = x
        for layer_id, layer in enumerate(self.layers[:-1]):
            result = layer(result)
            if self.model_type == 'conventional':
                result = self.activation(result)
            elif self.model_type == 'swish':
                result = swish(result)
            elif self.model_type == 'SIREN':
                result = torch.sin(result * self.siren_multiplier)
            elif self.model_type == 'FILM SIREN':
                result = torch.sin(self.alpha[layer_id][0] * result + self.alpha[layer_id][1])
            elif self.model_type == 'Gaussian': 
                result = gaussian(result, self.alpha[layer_id][0])
        result = self.layers[-1](result)
        return result

__init__(dimensions, activation=torch.nn.ReLU(), bias=False, model_type='conventional', siren_multiplier=1.0, input_multiplier=None)

Parameters:

  • dimensions
                List of integers representing the dimensions of each layer (e.g., [2, 10, 1], where the first layer has two channels and last one has one channel.).
    
  • activation
                Nonlinear activation function.
                Default is `torch.nn.ReLU()`.
    
  • bias
                If set to True, linear layers will include biases.
    
  • siren_multiplier
                When using `SIREN` model type, this parameter functions as a hyperparameter.
                The original SIREN work uses 30.
                You can bypass this parameter by providing input that are not normalized and larger then one.
    
  • input_multiplier
                Initial value of the input multiplier before the very first layer.
    
  • model_type
                Model type: `conventional`, `swish`, `SIREN`, `FILM SIREN`, `Gaussian`.
                `conventional` refers to a standard multi layer perceptron.
                For `SIREN,` see: Sitzmann, Vincent, et al. "Implicit neural representations with periodic activation functions." Advances in neural information processing systems 33 (2020): 7462-7473.
                For `Swish,` see: Ramachandran, Prajit, Barret Zoph, and Quoc V. Le. "Searching for activation functions." arXiv preprint arXiv:1710.05941 (2017). 
                For `FILM SIREN,` see: Chan, Eric R., et al. "pi-gan: Periodic implicit generative adversarial networks for 3d-aware image synthesis." Proceedings of the IEEE/CVF conference on computer vision and pattern recognition. 2021.
                For `Gaussian,` see: Ramasinghe, Sameera, and Simon Lucey. "Beyond periodicity: Towards a unifying framework for activations in coordinate-mlps." In European Conference on Computer Vision, pp. 142-158. Cham: Springer Nature Switzerland, 2022.
    
Source code in odak/learn/models/models.py
def __init__(self,
             dimensions,
             activation = torch.nn.ReLU(),
             bias = False,
             model_type = 'conventional',
             siren_multiplier = 1.,
             input_multiplier = None
            ):
    """
    Parameters
    ----------
    dimensions        : list
                        List of integers representing the dimensions of each layer (e.g., [2, 10, 1], where the first layer has two channels and last one has one channel.).
    activation        : torch.nn
                        Nonlinear activation function.
                        Default is `torch.nn.ReLU()`.
    bias              : bool
                        If set to True, linear layers will include biases.
    siren_multiplier  : float
                        When using `SIREN` model type, this parameter functions as a hyperparameter.
                        The original SIREN work uses 30.
                        You can bypass this parameter by providing input that are not normalized and larger then one.
    input_multiplier  : float
                        Initial value of the input multiplier before the very first layer.
    model_type        : str
                        Model type: `conventional`, `swish`, `SIREN`, `FILM SIREN`, `Gaussian`.
                        `conventional` refers to a standard multi layer perceptron.
                        For `SIREN,` see: Sitzmann, Vincent, et al. "Implicit neural representations with periodic activation functions." Advances in neural information processing systems 33 (2020): 7462-7473.
                        For `Swish,` see: Ramachandran, Prajit, Barret Zoph, and Quoc V. Le. "Searching for activation functions." arXiv preprint arXiv:1710.05941 (2017). 
                        For `FILM SIREN,` see: Chan, Eric R., et al. "pi-gan: Periodic implicit generative adversarial networks for 3d-aware image synthesis." Proceedings of the IEEE/CVF conference on computer vision and pattern recognition. 2021.
                        For `Gaussian,` see: Ramasinghe, Sameera, and Simon Lucey. "Beyond periodicity: Towards a unifying framework for activations in coordinate-mlps." In European Conference on Computer Vision, pp. 142-158. Cham: Springer Nature Switzerland, 2022.
    """
    super(multi_layer_perceptron, self).__init__()
    self.activation = activation
    self.bias = bias
    self.model_type = model_type
    self.layers = torch.nn.ModuleList()
    self.siren_multiplier = siren_multiplier
    self.dimensions = dimensions
    for i in range(len(self.dimensions) - 1):
        self.layers.append(torch.nn.Linear(self.dimensions[i], self.dimensions[i + 1], bias = self.bias))
    if not isinstance(input_multiplier, type(None)):
        self.input_multiplier = torch.nn.ParameterList()
        self.input_multiplier.append(torch.nn.Parameter(torch.ones(1, self.dimensions[0]) * input_multiplier))
    if self.model_type == 'FILM SIREN':
        self.alpha = torch.nn.ParameterList()
        for j in self.dimensions[1:-1]:
            self.alpha.append(torch.nn.Parameter(torch.randn(2, 1, j)))
    if self.model_type == 'Gaussian':
        self.alpha = torch.nn.ParameterList()
        for j in self.dimensions[1:-1]:
            self.alpha.append(torch.nn.Parameter(torch.randn(1, 1, j)))

forward(x)

Forward model.

Parameters:

  • x
            Input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/models.py
def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x             : torch.tensor
                    Input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.      
    """
    if hasattr(self, 'input_multiplier'):
        result = x * self.input_multiplier[0]
    else:
        result = x
    for layer_id, layer in enumerate(self.layers[:-1]):
        result = layer(result)
        if self.model_type == 'conventional':
            result = self.activation(result)
        elif self.model_type == 'swish':
            result = swish(result)
        elif self.model_type == 'SIREN':
            result = torch.sin(result * self.siren_multiplier)
        elif self.model_type == 'FILM SIREN':
            result = torch.sin(self.alpha[layer_id][0] * result + self.alpha[layer_id][1])
        elif self.model_type == 'Gaussian': 
            result = gaussian(result, self.alpha[layer_id][0])
    result = self.layers[-1](result)
    return result

non_local_layer

Bases: Module

Self-Attention Layer [zi = Wzyi + xi] (non-local block : ref https://arxiv.org/abs/1711.07971)

Source code in odak/learn/models/components.py
class non_local_layer(torch.nn.Module):
    """
    Self-Attention Layer [zi = Wzyi + xi] (non-local block : ref https://arxiv.org/abs/1711.07971)
    """
    def __init__(
                 self,
                 input_channels = 1024,
                 bottleneck_channels = 512,
                 kernel_size = 1,
                 bias = False,
                ):
        """

        Parameters
        ----------
        input_channels      : int
                              Number of input channels.
        bottleneck_channels : int
                              Number of middle channels.
        kernel_size         : int
                              Kernel size.
        bias                : bool 
                              Set to True to let convolutional layers have bias term.
        """
        super(non_local_layer, self).__init__()
        self.input_channels = input_channels
        self.bottleneck_channels = bottleneck_channels
        self.g = torch.nn.Conv2d(
                                 self.input_channels, 
                                 self.bottleneck_channels,
                                 kernel_size = kernel_size,
                                 padding = kernel_size // 2,
                                 bias = bias
                                )
        self.W_z = torch.nn.Sequential(
                                       torch.nn.Conv2d(
                                                       self.bottleneck_channels,
                                                       self.input_channels, 
                                                       kernel_size = kernel_size,
                                                       bias = bias,
                                                       padding = kernel_size // 2
                                                      ),
                                       torch.nn.BatchNorm2d(self.input_channels)
                                      )
        torch.nn.init.constant_(self.W_z[1].weight, 0)   
        torch.nn.init.constant_(self.W_z[1].bias, 0)


    def forward(self, x):
        """
        Forward model [zi = Wzyi + xi]

        Parameters
        ----------
        x               : torch.tensor
                          First input data.                       


        Returns
        ----------
        z               : torch.tensor
                          Estimated output.
        """
        batch_size, channels, height, width = x.size()
        theta = x.view(batch_size, channels, -1).permute(0, 2, 1)
        phi = x.view(batch_size, channels, -1).permute(0, 2, 1)
        g = self.g(x).view(batch_size, self.bottleneck_channels, -1).permute(0, 2, 1)
        attn = torch.bmm(theta, phi.transpose(1, 2)) / (height * width)
        attn = torch.nn.functional.softmax(attn, dim=-1)
        y = torch.bmm(attn, g).permute(0, 2, 1).contiguous().view(batch_size, self.bottleneck_channels, height, width)
        W_y = self.W_z(y)
        z = W_y + x
        return z

__init__(input_channels=1024, bottleneck_channels=512, kernel_size=1, bias=False)

Parameters:

  • input_channels
                  Number of input channels.
    
  • bottleneck_channels (int, default: 512 ) –
                  Number of middle channels.
    
  • kernel_size
                  Kernel size.
    
  • bias
                  Set to True to let convolutional layers have bias term.
    
Source code in odak/learn/models/components.py
def __init__(
             self,
             input_channels = 1024,
             bottleneck_channels = 512,
             kernel_size = 1,
             bias = False,
            ):
    """

    Parameters
    ----------
    input_channels      : int
                          Number of input channels.
    bottleneck_channels : int
                          Number of middle channels.
    kernel_size         : int
                          Kernel size.
    bias                : bool 
                          Set to True to let convolutional layers have bias term.
    """
    super(non_local_layer, self).__init__()
    self.input_channels = input_channels
    self.bottleneck_channels = bottleneck_channels
    self.g = torch.nn.Conv2d(
                             self.input_channels, 
                             self.bottleneck_channels,
                             kernel_size = kernel_size,
                             padding = kernel_size // 2,
                             bias = bias
                            )
    self.W_z = torch.nn.Sequential(
                                   torch.nn.Conv2d(
                                                   self.bottleneck_channels,
                                                   self.input_channels, 
                                                   kernel_size = kernel_size,
                                                   bias = bias,
                                                   padding = kernel_size // 2
                                                  ),
                                   torch.nn.BatchNorm2d(self.input_channels)
                                  )
    torch.nn.init.constant_(self.W_z[1].weight, 0)   
    torch.nn.init.constant_(self.W_z[1].bias, 0)

forward(x)

Forward model [zi = Wzyi + xi]

Parameters:

  • x
              First input data.
    

Returns:

  • z ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model [zi = Wzyi + xi]

    Parameters
    ----------
    x               : torch.tensor
                      First input data.                       


    Returns
    ----------
    z               : torch.tensor
                      Estimated output.
    """
    batch_size, channels, height, width = x.size()
    theta = x.view(batch_size, channels, -1).permute(0, 2, 1)
    phi = x.view(batch_size, channels, -1).permute(0, 2, 1)
    g = self.g(x).view(batch_size, self.bottleneck_channels, -1).permute(0, 2, 1)
    attn = torch.bmm(theta, phi.transpose(1, 2)) / (height * width)
    attn = torch.nn.functional.softmax(attn, dim=-1)
    y = torch.bmm(attn, g).permute(0, 2, 1).contiguous().view(batch_size, self.bottleneck_channels, height, width)
    W_y = self.W_z(y)
    z = W_y + x
    return z

normalization

Bases: Module

A normalization layer.

Source code in odak/learn/models/components.py
class normalization(torch.nn.Module):
    """
    A normalization layer.
    """
    def __init__(
                 self,
                 dim = 1,
                ):
        """
        Normalization layer.


        Parameters
        ----------
        dim             : int
                          Dimension (axis) to normalize.
        """
        super().__init__()
        self.k = torch.nn.Parameter(torch.ones(1, dim, 1, 1))


    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x             : torch.tensor
                        Input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.      
        """
        eps = 1e-5 if x.dtype == torch.float32 else 1e-3
        var = torch.var(x, dim = 1, unbiased = False, keepdim = True)
        mean = torch.mean(x, dim = 1, keepdim = True)
        result =  (x - mean) * (var + eps).rsqrt() * self.k
        return result 

__init__(dim=1)

Normalization layer.

Parameters:

  • dim
              Dimension (axis) to normalize.
    
Source code in odak/learn/models/components.py
def __init__(
             self,
             dim = 1,
            ):
    """
    Normalization layer.


    Parameters
    ----------
    dim             : int
                      Dimension (axis) to normalize.
    """
    super().__init__()
    self.k = torch.nn.Parameter(torch.ones(1, dim, 1, 1))

forward(x)

Forward model.

Parameters:

  • x
            Input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x             : torch.tensor
                    Input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.      
    """
    eps = 1e-5 if x.dtype == torch.float32 else 1e-3
    var = torch.var(x, dim = 1, unbiased = False, keepdim = True)
    mean = torch.mean(x, dim = 1, keepdim = True)
    result =  (x - mean) * (var + eps).rsqrt() * self.k
    return result 

positional_encoder

Bases: Module

A positional encoder module.

Source code in odak/learn/models/components.py
class positional_encoder(torch.nn.Module):
    """
    A positional encoder module.
    """

    def __init__(self, L):
        """
        A positional encoder module.

        Parameters
        ----------
        L                   : int
                              Positional encoding level.
        """
        super(positional_encoder, self).__init__()
        self.L = L


    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x               : torch.tensor
                          Input data.

        Returns
        ----------
        result          : torch.tensor
                          Result of the forward operation
        """
        B, C = x.shape
        x = x.view(B, C, 1)
        results = [x]
        for i in range(1, self.L + 1):
            freq = (2 ** i) * torch.pi
            cos_x = torch.cos(freq * x)
            sin_x = torch.sin(freq * x)
            results.append(cos_x)
            results.append(sin_x)
        results = torch.cat(results, dim=2)
        results = results.permute(0, 2, 1)
        results = results.reshape(B, -1)
        return results

__init__(L)

A positional encoder module.

Parameters:

  • L
                  Positional encoding level.
    
Source code in odak/learn/models/components.py
def __init__(self, L):
    """
    A positional encoder module.

    Parameters
    ----------
    L                   : int
                          Positional encoding level.
    """
    super(positional_encoder, self).__init__()
    self.L = L

forward(x)

Forward model.

Parameters:

  • x
              Input data.
    

Returns:

  • result ( tensor ) –

    Result of the forward operation

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x               : torch.tensor
                      Input data.

    Returns
    ----------
    result          : torch.tensor
                      Result of the forward operation
    """
    B, C = x.shape
    x = x.view(B, C, 1)
    results = [x]
    for i in range(1, self.L + 1):
        freq = (2 ** i) * torch.pi
        cos_x = torch.cos(freq * x)
        sin_x = torch.sin(freq * x)
        results.append(cos_x)
        results.append(sin_x)
    results = torch.cat(results, dim=2)
    results = results.permute(0, 2, 1)
    results = results.reshape(B, -1)
    return results

residual_attention_layer

Bases: Module

A residual block with an attention layer.

Source code in odak/learn/models/components.py
class residual_attention_layer(torch.nn.Module):
    """
    A residual block with an attention layer.
    """
    def __init__(
                 self,
                 input_channels = 2,
                 output_channels = 2,
                 kernel_size = 1,
                 bias = False,
                 activation = torch.nn.ReLU()
                ):
        """
        An attention layer class.


        Parameters
        ----------
        input_channels  : int or optioal
                          Number of input channels.
        output_channels : int or optional
                          Number of middle channels.
        kernel_size     : int or optional
                          Kernel size.
        bias            : bool or optional
                          Set to True to let convolutional layers have bias term.
        activation      : torch.nn or optional
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        self.activation = activation
        self.convolution0 = torch.nn.Sequential(
                                                torch.nn.Conv2d(
                                                                input_channels,
                                                                output_channels,
                                                                kernel_size = kernel_size,
                                                                padding = kernel_size // 2,
                                                                bias = bias
                                                               ),
                                                torch.nn.BatchNorm2d(output_channels)
                                               )
        self.convolution1 = torch.nn.Sequential(
                                                torch.nn.Conv2d(
                                                                input_channels,
                                                                output_channels,
                                                                kernel_size = kernel_size,
                                                                padding = kernel_size // 2,
                                                                bias = bias
                                                               ),
                                                torch.nn.BatchNorm2d(output_channels)
                                               )
        self.final_layer = torch.nn.Sequential(
                                               self.activation,
                                               torch.nn.Conv2d(
                                                               output_channels,
                                                               output_channels,
                                                               kernel_size = kernel_size,
                                                               padding = kernel_size // 2,
                                                               bias = bias
                                                              )
                                              )


    def forward(self, x0, x1):
        """
        Forward model.

        Parameters
        ----------
        x0             : torch.tensor
                         First input data.

        x1             : torch.tensor
                         Seconnd input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.      
        """
        y0 = self.convolution0(x0)
        y1 = self.convolution1(x1)
        y2 = torch.add(y0, y1)
        result = self.final_layer(y2) * x0
        return result

__init__(input_channels=2, output_channels=2, kernel_size=1, bias=False, activation=torch.nn.ReLU())

An attention layer class.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int or optional, default: 2 ) –
              Number of middle channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
             self,
             input_channels = 2,
             output_channels = 2,
             kernel_size = 1,
             bias = False,
             activation = torch.nn.ReLU()
            ):
    """
    An attention layer class.


    Parameters
    ----------
    input_channels  : int or optioal
                      Number of input channels.
    output_channels : int or optional
                      Number of middle channels.
    kernel_size     : int or optional
                      Kernel size.
    bias            : bool or optional
                      Set to True to let convolutional layers have bias term.
    activation      : torch.nn or optional
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    self.activation = activation
    self.convolution0 = torch.nn.Sequential(
                                            torch.nn.Conv2d(
                                                            input_channels,
                                                            output_channels,
                                                            kernel_size = kernel_size,
                                                            padding = kernel_size // 2,
                                                            bias = bias
                                                           ),
                                            torch.nn.BatchNorm2d(output_channels)
                                           )
    self.convolution1 = torch.nn.Sequential(
                                            torch.nn.Conv2d(
                                                            input_channels,
                                                            output_channels,
                                                            kernel_size = kernel_size,
                                                            padding = kernel_size // 2,
                                                            bias = bias
                                                           ),
                                            torch.nn.BatchNorm2d(output_channels)
                                           )
    self.final_layer = torch.nn.Sequential(
                                           self.activation,
                                           torch.nn.Conv2d(
                                                           output_channels,
                                                           output_channels,
                                                           kernel_size = kernel_size,
                                                           padding = kernel_size // 2,
                                                           bias = bias
                                                          )
                                          )

forward(x0, x1)

Forward model.

Parameters:

  • x0
             First input data.
    
  • x1
             Seconnd input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x0, x1):
    """
    Forward model.

    Parameters
    ----------
    x0             : torch.tensor
                     First input data.

    x1             : torch.tensor
                     Seconnd input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.      
    """
    y0 = self.convolution0(x0)
    y1 = self.convolution1(x1)
    y2 = torch.add(y0, y1)
    result = self.final_layer(y2) * x0
    return result

residual_layer

Bases: Module

A residual layer.

Source code in odak/learn/models/components.py
class residual_layer(torch.nn.Module):
    """
    A residual layer.
    """
    def __init__(
                 self,
                 input_channels = 2,
                 mid_channels = 16,
                 kernel_size = 3,
                 bias = False,
                 activation = torch.nn.ReLU()
                ):
        """
        A convolutional layer class.


        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        mid_channels    : int
                          Number of middle channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool 
                          Set to True to let convolutional layers have bias term.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super().__init__()
        self.activation = activation
        self.convolution = double_convolution(
                                              input_channels,
                                              mid_channels = mid_channels,
                                              output_channels = input_channels,
                                              kernel_size = kernel_size,
                                              bias = bias,
                                              activation = activation
                                             )


    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x             : torch.tensor
                        Input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.      
        """
        x0 = self.convolution(x)
        return x + x0

__init__(input_channels=2, mid_channels=16, kernel_size=3, bias=False, activation=torch.nn.ReLU())

A convolutional layer class.

Parameters:

  • input_channels
              Number of input channels.
    
  • mid_channels
              Number of middle channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
             self,
             input_channels = 2,
             mid_channels = 16,
             kernel_size = 3,
             bias = False,
             activation = torch.nn.ReLU()
            ):
    """
    A convolutional layer class.


    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    mid_channels    : int
                      Number of middle channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool 
                      Set to True to let convolutional layers have bias term.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super().__init__()
    self.activation = activation
    self.convolution = double_convolution(
                                          input_channels,
                                          mid_channels = mid_channels,
                                          output_channels = input_channels,
                                          kernel_size = kernel_size,
                                          bias = bias,
                                          activation = activation
                                         )

forward(x)

Forward model.

Parameters:

  • x
            Input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x             : torch.tensor
                    Input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.      
    """
    x0 = self.convolution(x)
    return x + x0

spatial_gate

Bases: Module

Spatial attention module that applies a convolution layer after channel pooling. This class is heavily inspired by https://github.com/Jongchan/attention-module/blob/master/MODELS/cbam.py.

Source code in odak/learn/models/components.py
class spatial_gate(torch.nn.Module):
    """
    Spatial attention module that applies a convolution layer after channel pooling.
    This class is heavily inspired by https://github.com/Jongchan/attention-module/blob/master/MODELS/cbam.py.
    """
    def __init__(self):
        """
        Initializes the spatial gate module.
        """
        super().__init__()
        kernel_size = 7
        self.spatial = convolution_layer(2, 1, kernel_size, bias = False, activation = torch.nn.Identity())


    def channel_pool(self, x):
        """
        Applies max and average pooling on the channels.

        Parameters
        ----------
        x             : torch.tensor
                        Input tensor.

        Returns
        -------
        output        : torch.tensor
                        Output tensor.
        """
        max_pool = torch.max(x, 1)[0].unsqueeze(1)
        avg_pool = torch.mean(x, 1).unsqueeze(1)
        output = torch.cat((max_pool, avg_pool), dim=1)
        return output


    def forward(self, x):
        """
        Forward pass of the SpatialGate module.

        Applies spatial attention to the input tensor.

        Parameters
        ----------
        x            : torch.tensor
                       Input tensor to the SpatialGate module.

        Returns
        -------
        scaled_x     : torch.tensor
                       Output tensor after applying spatial attention.
        """
        x_compress = self.channel_pool(x)
        x_out = self.spatial(x_compress)
        scale = torch.sigmoid(x_out)
        scaled_x = x * scale
        return scaled_x

__init__()

Initializes the spatial gate module.

Source code in odak/learn/models/components.py
def __init__(self):
    """
    Initializes the spatial gate module.
    """
    super().__init__()
    kernel_size = 7
    self.spatial = convolution_layer(2, 1, kernel_size, bias = False, activation = torch.nn.Identity())

channel_pool(x)

Applies max and average pooling on the channels.

Parameters:

  • x
            Input tensor.
    

Returns:

  • output ( tensor ) –

    Output tensor.

Source code in odak/learn/models/components.py
def channel_pool(self, x):
    """
    Applies max and average pooling on the channels.

    Parameters
    ----------
    x             : torch.tensor
                    Input tensor.

    Returns
    -------
    output        : torch.tensor
                    Output tensor.
    """
    max_pool = torch.max(x, 1)[0].unsqueeze(1)
    avg_pool = torch.mean(x, 1).unsqueeze(1)
    output = torch.cat((max_pool, avg_pool), dim=1)
    return output

forward(x)

Forward pass of the SpatialGate module.

Applies spatial attention to the input tensor.

Parameters:

  • x
           Input tensor to the SpatialGate module.
    

Returns:

  • scaled_x ( tensor ) –

    Output tensor after applying spatial attention.

Source code in odak/learn/models/components.py
def forward(self, x):
    """
    Forward pass of the SpatialGate module.

    Applies spatial attention to the input tensor.

    Parameters
    ----------
    x            : torch.tensor
                   Input tensor to the SpatialGate module.

    Returns
    -------
    scaled_x     : torch.tensor
                   Output tensor after applying spatial attention.
    """
    x_compress = self.channel_pool(x)
    x_out = self.spatial(x_compress)
    scale = torch.sigmoid(x_out)
    scaled_x = x * scale
    return scaled_x

spatially_adaptive_convolution

Bases: Module

A spatially adaptive convolution layer.

References

C. Zheng et al. "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions." C. Xu et al. "Squeezesegv3: Spatially-adaptive Convolution for Efficient Point-Cloud Segmentation." C. Zheng et al. "Windowing Decomposition Convolutional Neural Network for Image Enhancement."

Source code in odak/learn/models/components.py
class spatially_adaptive_convolution(torch.nn.Module):
    """
    A spatially adaptive convolution layer.

    References
    ----------

    C. Zheng et al. "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions."
    C. Xu et al. "Squeezesegv3: Spatially-adaptive Convolution for Efficient Point-Cloud Segmentation."
    C. Zheng et al. "Windowing Decomposition Convolutional Neural Network for Image Enhancement."
    """
    def __init__(
                 self,
                 input_channels = 2,
                 output_channels = 2,
                 kernel_size = 3,
                 stride = 1,
                 padding = 1,
                 bias = False,
                 activation = torch.nn.LeakyReLU(0.2, inplace = True)
                ):
        """
        Initializes a spatially adaptive convolution layer.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Size of the convolution kernel.
        stride          : int
                          Stride of the convolution.
        padding         : int
                          Padding added to both sides of the input.
        bias            : bool
                          If True, includes a bias term in the convolution.
        activation      : torch.nn.Module
                          Activation function to apply. If None, no activation is applied.
        """
        super(spatially_adaptive_convolution, self).__init__()
        self.kernel_size = kernel_size
        self.input_channels = input_channels
        self.output_channels = output_channels
        self.stride = stride
        self.padding = padding
        self.standard_convolution = torch.nn.Conv2d(
                                                    in_channels = input_channels,
                                                    out_channels = self.output_channels,
                                                    kernel_size = kernel_size,
                                                    stride = stride,
                                                    padding = padding,
                                                    bias = bias
                                                   )
        self.weight = torch.nn.Parameter(data = self.standard_convolution.weight, requires_grad = True)
        self.activation = activation


    def forward(self, x, sv_kernel_feature):
        """
        Forward pass for the spatially adaptive convolution layer.

        Parameters
        ----------
        x                  : torch.tensor
                            Input data tensor.
                            Dimension: (1, C, H, W)
        sv_kernel_feature   : torch.tensor
                            Spatially varying kernel features.
                            Dimension: (1, C_i * kernel_size * kernel_size, H, W)

        Returns
        -------
        sa_output          : torch.tensor
                            Estimated output tensor.
                            Dimension: (1, output_channels, H_out, W_out)
        """
        # Pad input and sv_kernel_feature if necessary
        if sv_kernel_feature.size(-1) * self.stride != x.size(-1) or sv_kernel_feature.size(
                -2) * self.stride != x.size(-2):
            diffY = sv_kernel_feature.size(-2) % self.stride
            diffX = sv_kernel_feature.size(-1) % self.stride
            sv_kernel_feature = torch.nn.functional.pad(sv_kernel_feature, (diffX // 2, diffX - diffX // 2,
                                                                            diffY // 2, diffY - diffY // 2))
            diffY = x.size(-2) % self.stride
            diffX = x.size(-1) % self.stride
            x = torch.nn.functional.pad(x, (diffX // 2, diffX - diffX // 2,
                                            diffY // 2, diffY - diffY // 2))

        # Unfold the input tensor for matrix multiplication
        input_feature = torch.nn.functional.unfold(
                                                   x,
                                                   kernel_size = (self.kernel_size, self.kernel_size),
                                                   stride = self.stride,
                                                   padding = self.padding
                                                  )

        # Resize sv_kernel_feature to match the input feature
        sv_kernel = sv_kernel_feature.reshape(
                                              1,
                                              self.input_channels * self.kernel_size * self.kernel_size,
                                              (x.size(-2) // self.stride) * (x.size(-1) // self.stride)
                                             )

        # Resize weight to match the input channels and kernel size
        si_kernel = self.weight.reshape(
                                        self.weight_output_channels,
                                        self.input_channels * self.kernel_size * self.kernel_size
                                       )

        # Apply spatially varying kernels
        sv_feature = input_feature * sv_kernel

        # Perform matrix multiplication
        sa_output = torch.matmul(si_kernel, sv_feature).reshape(
                                                                1, self.weight_output_channels,
                                                                (x.size(-2) // self.stride),
                                                                (x.size(-1) // self.stride)
                                                               )
        return sa_output

__init__(input_channels=2, output_channels=2, kernel_size=3, stride=1, padding=1, bias=False, activation=torch.nn.LeakyReLU(0.2, inplace=True))

Initializes a spatially adaptive convolution layer.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int, default: 2 ) –
              Number of output channels.
    
  • kernel_size
              Size of the convolution kernel.
    
  • stride
              Stride of the convolution.
    
  • padding
              Padding added to both sides of the input.
    
  • bias
              If True, includes a bias term in the convolution.
    
  • activation
              Activation function to apply. If None, no activation is applied.
    
Source code in odak/learn/models/components.py
def __init__(
             self,
             input_channels = 2,
             output_channels = 2,
             kernel_size = 3,
             stride = 1,
             padding = 1,
             bias = False,
             activation = torch.nn.LeakyReLU(0.2, inplace = True)
            ):
    """
    Initializes a spatially adaptive convolution layer.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Size of the convolution kernel.
    stride          : int
                      Stride of the convolution.
    padding         : int
                      Padding added to both sides of the input.
    bias            : bool
                      If True, includes a bias term in the convolution.
    activation      : torch.nn.Module
                      Activation function to apply. If None, no activation is applied.
    """
    super(spatially_adaptive_convolution, self).__init__()
    self.kernel_size = kernel_size
    self.input_channels = input_channels
    self.output_channels = output_channels
    self.stride = stride
    self.padding = padding
    self.standard_convolution = torch.nn.Conv2d(
                                                in_channels = input_channels,
                                                out_channels = self.output_channels,
                                                kernel_size = kernel_size,
                                                stride = stride,
                                                padding = padding,
                                                bias = bias
                                               )
    self.weight = torch.nn.Parameter(data = self.standard_convolution.weight, requires_grad = True)
    self.activation = activation

forward(x, sv_kernel_feature)

Forward pass for the spatially adaptive convolution layer.

Parameters:

  • x
                Input data tensor.
                Dimension: (1, C, H, W)
    
  • sv_kernel_feature
                Spatially varying kernel features.
                Dimension: (1, C_i * kernel_size * kernel_size, H, W)
    

Returns:

  • sa_output ( tensor ) –

    Estimated output tensor. Dimension: (1, output_channels, H_out, W_out)

Source code in odak/learn/models/components.py
def forward(self, x, sv_kernel_feature):
    """
    Forward pass for the spatially adaptive convolution layer.

    Parameters
    ----------
    x                  : torch.tensor
                        Input data tensor.
                        Dimension: (1, C, H, W)
    sv_kernel_feature   : torch.tensor
                        Spatially varying kernel features.
                        Dimension: (1, C_i * kernel_size * kernel_size, H, W)

    Returns
    -------
    sa_output          : torch.tensor
                        Estimated output tensor.
                        Dimension: (1, output_channels, H_out, W_out)
    """
    # Pad input and sv_kernel_feature if necessary
    if sv_kernel_feature.size(-1) * self.stride != x.size(-1) or sv_kernel_feature.size(
            -2) * self.stride != x.size(-2):
        diffY = sv_kernel_feature.size(-2) % self.stride
        diffX = sv_kernel_feature.size(-1) % self.stride
        sv_kernel_feature = torch.nn.functional.pad(sv_kernel_feature, (diffX // 2, diffX - diffX // 2,
                                                                        diffY // 2, diffY - diffY // 2))
        diffY = x.size(-2) % self.stride
        diffX = x.size(-1) % self.stride
        x = torch.nn.functional.pad(x, (diffX // 2, diffX - diffX // 2,
                                        diffY // 2, diffY - diffY // 2))

    # Unfold the input tensor for matrix multiplication
    input_feature = torch.nn.functional.unfold(
                                               x,
                                               kernel_size = (self.kernel_size, self.kernel_size),
                                               stride = self.stride,
                                               padding = self.padding
                                              )

    # Resize sv_kernel_feature to match the input feature
    sv_kernel = sv_kernel_feature.reshape(
                                          1,
                                          self.input_channels * self.kernel_size * self.kernel_size,
                                          (x.size(-2) // self.stride) * (x.size(-1) // self.stride)
                                         )

    # Resize weight to match the input channels and kernel size
    si_kernel = self.weight.reshape(
                                    self.weight_output_channels,
                                    self.input_channels * self.kernel_size * self.kernel_size
                                   )

    # Apply spatially varying kernels
    sv_feature = input_feature * sv_kernel

    # Perform matrix multiplication
    sa_output = torch.matmul(si_kernel, sv_feature).reshape(
                                                            1, self.weight_output_channels,
                                                            (x.size(-2) // self.stride),
                                                            (x.size(-1) // self.stride)
                                                           )
    return sa_output

spatially_adaptive_module

Bases: Module

A spatially adaptive module that combines learned spatially adaptive convolutions.

References

Chuanjun Zheng, Yicheng Zhan, Liang Shi, Ozan Cakmakci, and Kaan Akşit, "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions," SIGGRAPH Asia 2024 Technical Communications (SA Technical Communications '24), December, 2024.

Source code in odak/learn/models/components.py
class spatially_adaptive_module(torch.nn.Module):
    """
    A spatially adaptive module that combines learned spatially adaptive convolutions.

    References
    ----------

    Chuanjun Zheng, Yicheng Zhan, Liang Shi, Ozan Cakmakci, and Kaan Akşit, "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions," SIGGRAPH Asia 2024 Technical Communications (SA Technical Communications '24), December, 2024.
    """
    def __init__(
                 self,
                 input_channels = 2,
                 output_channels = 2,
                 kernel_size = 3,
                 stride = 1,
                 padding = 1,
                 bias = False,
                 activation = torch.nn.LeakyReLU(0.2, inplace = True)
                ):
        """
        Initializes a spatially adaptive module.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Size of the convolution kernel.
        stride          : int
                          Stride of the convolution.
        padding         : int
                          Padding added to both sides of the input.
        bias            : bool
                          If True, includes a bias term in the convolution.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        """
        super(spatially_adaptive_module, self).__init__()
        self.kernel_size = kernel_size
        self.input_channels = input_channels
        self.output_channels = output_channels
        self.stride = stride
        self.padding = padding
        self.weight_output_channels = self.output_channels - 1
        self.standard_convolution = torch.nn.Conv2d(
                                                    in_channels = input_channels,
                                                    out_channels = self.weight_output_channels,
                                                    kernel_size = kernel_size,
                                                    stride = stride,
                                                    padding = padding,
                                                    bias = bias
                                                   )
        self.weight = torch.nn.Parameter(data = self.standard_convolution.weight, requires_grad = True)
        self.activation = activation


    def forward(self, x, sv_kernel_feature):
        """
        Forward pass for the spatially adaptive module.

        Parameters
        ----------
        x                  : torch.tensor
                            Input data tensor.
                            Dimension: (1, C, H, W)
        sv_kernel_feature   : torch.tensor
                            Spatially varying kernel features.
                            Dimension: (1, C_i * kernel_size * kernel_size, H, W)

        Returns
        -------
        output             : torch.tensor
                            Combined output tensor from standard and spatially adaptive convolutions.
                            Dimension: (1, output_channels, H_out, W_out)
        """
        # Pad input and sv_kernel_feature if necessary
        if sv_kernel_feature.size(-1) * self.stride != x.size(-1) or sv_kernel_feature.size(
                -2) * self.stride != x.size(-2):
            diffY = sv_kernel_feature.size(-2) % self.stride
            diffX = sv_kernel_feature.size(-1) % self.stride
            sv_kernel_feature = torch.nn.functional.pad(sv_kernel_feature, (diffX // 2, diffX - diffX // 2,
                                                                            diffY // 2, diffY - diffY // 2))
            diffY = x.size(-2) % self.stride
            diffX = x.size(-1) % self.stride
            x = torch.nn.functional.pad(x, (diffX // 2, diffX - diffX // 2,
                                            diffY // 2, diffY - diffY // 2))

        # Unfold the input tensor for matrix multiplication
        input_feature = torch.nn.functional.unfold(
                                                   x,
                                                   kernel_size = (self.kernel_size, self.kernel_size),
                                                   stride = self.stride,
                                                   padding = self.padding
                                                  )

        # Resize sv_kernel_feature to match the input feature
        sv_kernel = sv_kernel_feature.reshape(
                                              1,
                                              self.input_channels * self.kernel_size * self.kernel_size,
                                              (x.size(-2) // self.stride) * (x.size(-1) // self.stride)
                                             )

        # Apply sv_kernel to the input_feature
        sv_feature = input_feature * sv_kernel

        # Original spatially varying convolution output
        sv_output = torch.sum(sv_feature, dim = 1).reshape(
                                                           1,
                                                            1,
                                                            (x.size(-2) // self.stride),
                                                            (x.size(-1) // self.stride)
                                                           )

        # Reshape weight for spatially adaptive convolution
        si_kernel = self.weight.reshape(
                                        self.weight_output_channels,
                                        self.input_channels * self.kernel_size * self.kernel_size
                                       )

        # Apply si_kernel on sv convolution output
        sa_output = torch.matmul(si_kernel, sv_feature).reshape(
                                                                1, self.weight_output_channels,
                                                                (x.size(-2) // self.stride),
                                                                (x.size(-1) // self.stride)
                                                               )

        # Combine the outputs and apply activation function
        output = self.activation(torch.cat((sv_output, sa_output), dim = 1))
        return output

__init__(input_channels=2, output_channels=2, kernel_size=3, stride=1, padding=1, bias=False, activation=torch.nn.LeakyReLU(0.2, inplace=True))

Initializes a spatially adaptive module.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int, default: 2 ) –
              Number of output channels.
    
  • kernel_size
              Size of the convolution kernel.
    
  • stride
              Stride of the convolution.
    
  • padding
              Padding added to both sides of the input.
    
  • bias
              If True, includes a bias term in the convolution.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
Source code in odak/learn/models/components.py
def __init__(
             self,
             input_channels = 2,
             output_channels = 2,
             kernel_size = 3,
             stride = 1,
             padding = 1,
             bias = False,
             activation = torch.nn.LeakyReLU(0.2, inplace = True)
            ):
    """
    Initializes a spatially adaptive module.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Size of the convolution kernel.
    stride          : int
                      Stride of the convolution.
    padding         : int
                      Padding added to both sides of the input.
    bias            : bool
                      If True, includes a bias term in the convolution.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    """
    super(spatially_adaptive_module, self).__init__()
    self.kernel_size = kernel_size
    self.input_channels = input_channels
    self.output_channels = output_channels
    self.stride = stride
    self.padding = padding
    self.weight_output_channels = self.output_channels - 1
    self.standard_convolution = torch.nn.Conv2d(
                                                in_channels = input_channels,
                                                out_channels = self.weight_output_channels,
                                                kernel_size = kernel_size,
                                                stride = stride,
                                                padding = padding,
                                                bias = bias
                                               )
    self.weight = torch.nn.Parameter(data = self.standard_convolution.weight, requires_grad = True)
    self.activation = activation

forward(x, sv_kernel_feature)

Forward pass for the spatially adaptive module.

Parameters:

  • x
                Input data tensor.
                Dimension: (1, C, H, W)
    
  • sv_kernel_feature
                Spatially varying kernel features.
                Dimension: (1, C_i * kernel_size * kernel_size, H, W)
    

Returns:

  • output ( tensor ) –

    Combined output tensor from standard and spatially adaptive convolutions. Dimension: (1, output_channels, H_out, W_out)

Source code in odak/learn/models/components.py
def forward(self, x, sv_kernel_feature):
    """
    Forward pass for the spatially adaptive module.

    Parameters
    ----------
    x                  : torch.tensor
                        Input data tensor.
                        Dimension: (1, C, H, W)
    sv_kernel_feature   : torch.tensor
                        Spatially varying kernel features.
                        Dimension: (1, C_i * kernel_size * kernel_size, H, W)

    Returns
    -------
    output             : torch.tensor
                        Combined output tensor from standard and spatially adaptive convolutions.
                        Dimension: (1, output_channels, H_out, W_out)
    """
    # Pad input and sv_kernel_feature if necessary
    if sv_kernel_feature.size(-1) * self.stride != x.size(-1) or sv_kernel_feature.size(
            -2) * self.stride != x.size(-2):
        diffY = sv_kernel_feature.size(-2) % self.stride
        diffX = sv_kernel_feature.size(-1) % self.stride
        sv_kernel_feature = torch.nn.functional.pad(sv_kernel_feature, (diffX // 2, diffX - diffX // 2,
                                                                        diffY // 2, diffY - diffY // 2))
        diffY = x.size(-2) % self.stride
        diffX = x.size(-1) % self.stride
        x = torch.nn.functional.pad(x, (diffX // 2, diffX - diffX // 2,
                                        diffY // 2, diffY - diffY // 2))

    # Unfold the input tensor for matrix multiplication
    input_feature = torch.nn.functional.unfold(
                                               x,
                                               kernel_size = (self.kernel_size, self.kernel_size),
                                               stride = self.stride,
                                               padding = self.padding
                                              )

    # Resize sv_kernel_feature to match the input feature
    sv_kernel = sv_kernel_feature.reshape(
                                          1,
                                          self.input_channels * self.kernel_size * self.kernel_size,
                                          (x.size(-2) // self.stride) * (x.size(-1) // self.stride)
                                         )

    # Apply sv_kernel to the input_feature
    sv_feature = input_feature * sv_kernel

    # Original spatially varying convolution output
    sv_output = torch.sum(sv_feature, dim = 1).reshape(
                                                       1,
                                                        1,
                                                        (x.size(-2) // self.stride),
                                                        (x.size(-1) // self.stride)
                                                       )

    # Reshape weight for spatially adaptive convolution
    si_kernel = self.weight.reshape(
                                    self.weight_output_channels,
                                    self.input_channels * self.kernel_size * self.kernel_size
                                   )

    # Apply si_kernel on sv convolution output
    sa_output = torch.matmul(si_kernel, sv_feature).reshape(
                                                            1, self.weight_output_channels,
                                                            (x.size(-2) // self.stride),
                                                            (x.size(-1) // self.stride)
                                                           )

    # Combine the outputs and apply activation function
    output = self.activation(torch.cat((sv_output, sa_output), dim = 1))
    return output

spatially_adaptive_unet

Bases: Module

Spatially varying U-Net model based on spatially adaptive convolution.

References

Chuanjun Zheng, Yicheng Zhan, Liang Shi, Ozan Cakmakci, and Kaan Akşit, "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions," SIGGRAPH Asia 2024 Technical Communications (SA Technical Communications '24), December, 2024.

Source code in odak/learn/models/models.py
class spatially_adaptive_unet(torch.nn.Module):
    """
    Spatially varying U-Net model based on spatially adaptive convolution.

    References
    ----------

    Chuanjun Zheng, Yicheng Zhan, Liang Shi, Ozan Cakmakci, and Kaan Akşit, "Focal Surface Holographic Light Transport using Learned Spatially Adaptive Convolutions," SIGGRAPH Asia 2024 Technical Communications (SA Technical Communications '24), December, 2024.
    """
    def __init__(
                 self,
                 depth=3,
                 dimensions=8,
                 input_channels=6,
                 out_channels=6,
                 kernel_size=3,
                 bias=True,
                 normalization=False,
                 activation=torch.nn.LeakyReLU(0.2, inplace=True)
                ):
        """
        U-Net model.

        Parameters
        ----------
        depth          : int
                         Number of upsampling and downsampling layers.
        dimensions     : int
                         Number of dimensions.
        input_channels : int
                         Number of input channels.
        out_channels   : int
                         Number of output channels.
        bias           : bool
                         Set to True to let convolutional layers learn a bias term.
        normalization  : bool
                         If True, adds a Batch Normalization layer after the convolutional layer.
        activation     : torch.nn
                         Non-linear activation layer (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()).
        """
        super().__init__()
        self.depth = depth
        self.out_channels = out_channels
        self.inc = convolution_layer(
                                     input_channels=input_channels,
                                     output_channels=dimensions,
                                     kernel_size=kernel_size,
                                     bias=bias,
                                     normalization=normalization,
                                     activation=activation
                                    )

        self.encoder = torch.nn.ModuleList()
        for i in range(self.depth + 1):  # Downsampling layers
            down_in_channels = dimensions * (2 ** i)
            down_out_channels = 2 * down_in_channels
            pooling_layer = torch.nn.AvgPool2d(2)
            double_convolution_layer = double_convolution(
                                                          input_channels=down_in_channels,
                                                          mid_channels=down_in_channels,
                                                          output_channels=down_in_channels,
                                                          kernel_size=kernel_size,
                                                          bias=bias,
                                                          normalization=normalization,
                                                          activation=activation
                                                         )
            sam = spatially_adaptive_module(
                                            input_channels=down_in_channels,
                                            output_channels=down_out_channels,
                                            kernel_size=kernel_size,
                                            bias=bias,
                                            activation=activation
                                           )
            self.encoder.append(torch.nn.ModuleList([pooling_layer, double_convolution_layer, sam]))
        self.global_feature_module = torch.nn.ModuleList()
        double_convolution_layer = double_convolution(
                                                      input_channels=dimensions * (2 ** (depth + 1)),
                                                      mid_channels=dimensions * (2 ** (depth + 1)),
                                                      output_channels=dimensions * (2 ** (depth + 1)),
                                                      kernel_size=kernel_size,
                                                      bias=bias,
                                                      normalization=normalization,
                                                      activation=activation
                                                     )
        global_feature_layer = global_feature_module(
                                                     input_channels=dimensions * (2 ** (depth + 1)),
                                                     mid_channels=dimensions * (2 ** (depth + 1)),
                                                     output_channels=dimensions * (2 ** (depth + 1)),
                                                     kernel_size=kernel_size,
                                                     bias=bias,
                                                     activation=torch.nn.LeakyReLU(0.2, inplace=True)
                                                    )
        self.global_feature_module.append(torch.nn.ModuleList([double_convolution_layer, global_feature_layer]))
        self.decoder = torch.nn.ModuleList()
        for i in range(depth, -1, -1):
            up_in_channels = dimensions * (2 ** (i + 1))
            up_mid_channels = up_in_channels // 2
            if i == 0:
                up_out_channels = self.out_channels
                upsample_layer = upsample_convtranspose2d_layer(
                                                                input_channels=up_in_channels,
                                                                output_channels=up_mid_channels,
                                                                kernel_size=2,
                                                                stride=2,
                                                                bias=bias,
                                                               )
                conv_layer = torch.nn.Sequential(
                    convolution_layer(
                                      input_channels=up_mid_channels,
                                      output_channels=up_mid_channels,
                                      kernel_size=kernel_size,
                                      bias=bias,
                                      normalization=normalization,
                                      activation=activation,
                                     ),
                    convolution_layer(
                                      input_channels=up_mid_channels,
                                      output_channels=up_out_channels,
                                      kernel_size=1,
                                      bias=bias,
                                      normalization=normalization,
                                      activation=None,
                                     )
                )
                self.decoder.append(torch.nn.ModuleList([upsample_layer, conv_layer]))
            else:
                up_out_channels = up_in_channels // 2
                upsample_layer = upsample_convtranspose2d_layer(
                                                                input_channels=up_in_channels,
                                                                output_channels=up_mid_channels,
                                                                kernel_size=2,
                                                                stride=2,
                                                                bias=bias,
                                                               )
                conv_layer = double_convolution(
                                                input_channels=up_mid_channels,
                                                mid_channels=up_mid_channels,
                                                output_channels=up_out_channels,
                                                kernel_size=kernel_size,
                                                bias=bias,
                                                normalization=normalization,
                                                activation=activation,
                                               )
                self.decoder.append(torch.nn.ModuleList([upsample_layer, conv_layer]))


    def forward(self, sv_kernel, field):
        """
        Forward model.

        Parameters
        ----------
        sv_kernel : list of torch.tensor
                    Learned spatially varying kernels.
                    Dimension of each element in the list: (1, C_i * kernel_size * kernel_size, H_i, W_i),
                    where C_i, H_i, and W_i represent the channel, height, and width
                    of each feature at a certain scale.

        field     : torch.tensor
                    Input field data.
                    Dimension: (1, 6, H, W)

        Returns
        -------
        target_field : torch.tensor
                       Estimated output.
                       Dimension: (1, 6, H, W)
        """
        x = self.inc(field)
        downsampling_outputs = [x]
        for i, down_layer in enumerate(self.encoder):
            x_down = down_layer[0](downsampling_outputs[-1])
            downsampling_outputs.append(x_down)
            sam_output = down_layer[2](x_down + down_layer[1](x_down), sv_kernel[self.depth - i])
            downsampling_outputs.append(sam_output)
        global_feature = self.global_feature_module[0][0](downsampling_outputs[-1])
        global_feature = self.global_feature_module[0][1](downsampling_outputs[-1], global_feature)
        downsampling_outputs.append(global_feature)
        x_up = downsampling_outputs[-1]
        for i, up_layer in enumerate(self.decoder):
            x_up = up_layer[0](x_up, downsampling_outputs[2 * (self.depth - i)])
            x_up = up_layer[1](x_up)
        result = x_up
        return result

__init__(depth=3, dimensions=8, input_channels=6, out_channels=6, kernel_size=3, bias=True, normalization=False, activation=torch.nn.LeakyReLU(0.2, inplace=True))

U-Net model.

Parameters:

  • depth
             Number of upsampling and downsampling layers.
    
  • dimensions
             Number of dimensions.
    
  • input_channels (int, default: 6 ) –
             Number of input channels.
    
  • out_channels
             Number of output channels.
    
  • bias
             Set to True to let convolutional layers learn a bias term.
    
  • normalization
             If True, adds a Batch Normalization layer after the convolutional layer.
    
  • activation
             Non-linear activation layer (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()).
    
Source code in odak/learn/models/models.py
def __init__(
             self,
             depth=3,
             dimensions=8,
             input_channels=6,
             out_channels=6,
             kernel_size=3,
             bias=True,
             normalization=False,
             activation=torch.nn.LeakyReLU(0.2, inplace=True)
            ):
    """
    U-Net model.

    Parameters
    ----------
    depth          : int
                     Number of upsampling and downsampling layers.
    dimensions     : int
                     Number of dimensions.
    input_channels : int
                     Number of input channels.
    out_channels   : int
                     Number of output channels.
    bias           : bool
                     Set to True to let convolutional layers learn a bias term.
    normalization  : bool
                     If True, adds a Batch Normalization layer after the convolutional layer.
    activation     : torch.nn
                     Non-linear activation layer (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()).
    """
    super().__init__()
    self.depth = depth
    self.out_channels = out_channels
    self.inc = convolution_layer(
                                 input_channels=input_channels,
                                 output_channels=dimensions,
                                 kernel_size=kernel_size,
                                 bias=bias,
                                 normalization=normalization,
                                 activation=activation
                                )

    self.encoder = torch.nn.ModuleList()
    for i in range(self.depth + 1):  # Downsampling layers
        down_in_channels = dimensions * (2 ** i)
        down_out_channels = 2 * down_in_channels
        pooling_layer = torch.nn.AvgPool2d(2)
        double_convolution_layer = double_convolution(
                                                      input_channels=down_in_channels,
                                                      mid_channels=down_in_channels,
                                                      output_channels=down_in_channels,
                                                      kernel_size=kernel_size,
                                                      bias=bias,
                                                      normalization=normalization,
                                                      activation=activation
                                                     )
        sam = spatially_adaptive_module(
                                        input_channels=down_in_channels,
                                        output_channels=down_out_channels,
                                        kernel_size=kernel_size,
                                        bias=bias,
                                        activation=activation
                                       )
        self.encoder.append(torch.nn.ModuleList([pooling_layer, double_convolution_layer, sam]))
    self.global_feature_module = torch.nn.ModuleList()
    double_convolution_layer = double_convolution(
                                                  input_channels=dimensions * (2 ** (depth + 1)),
                                                  mid_channels=dimensions * (2 ** (depth + 1)),
                                                  output_channels=dimensions * (2 ** (depth + 1)),
                                                  kernel_size=kernel_size,
                                                  bias=bias,
                                                  normalization=normalization,
                                                  activation=activation
                                                 )
    global_feature_layer = global_feature_module(
                                                 input_channels=dimensions * (2 ** (depth + 1)),
                                                 mid_channels=dimensions * (2 ** (depth + 1)),
                                                 output_channels=dimensions * (2 ** (depth + 1)),
                                                 kernel_size=kernel_size,
                                                 bias=bias,
                                                 activation=torch.nn.LeakyReLU(0.2, inplace=True)
                                                )
    self.global_feature_module.append(torch.nn.ModuleList([double_convolution_layer, global_feature_layer]))
    self.decoder = torch.nn.ModuleList()
    for i in range(depth, -1, -1):
        up_in_channels = dimensions * (2 ** (i + 1))
        up_mid_channels = up_in_channels // 2
        if i == 0:
            up_out_channels = self.out_channels
            upsample_layer = upsample_convtranspose2d_layer(
                                                            input_channels=up_in_channels,
                                                            output_channels=up_mid_channels,
                                                            kernel_size=2,
                                                            stride=2,
                                                            bias=bias,
                                                           )
            conv_layer = torch.nn.Sequential(
                convolution_layer(
                                  input_channels=up_mid_channels,
                                  output_channels=up_mid_channels,
                                  kernel_size=kernel_size,
                                  bias=bias,
                                  normalization=normalization,
                                  activation=activation,
                                 ),
                convolution_layer(
                                  input_channels=up_mid_channels,
                                  output_channels=up_out_channels,
                                  kernel_size=1,
                                  bias=bias,
                                  normalization=normalization,
                                  activation=None,
                                 )
            )
            self.decoder.append(torch.nn.ModuleList([upsample_layer, conv_layer]))
        else:
            up_out_channels = up_in_channels // 2
            upsample_layer = upsample_convtranspose2d_layer(
                                                            input_channels=up_in_channels,
                                                            output_channels=up_mid_channels,
                                                            kernel_size=2,
                                                            stride=2,
                                                            bias=bias,
                                                           )
            conv_layer = double_convolution(
                                            input_channels=up_mid_channels,
                                            mid_channels=up_mid_channels,
                                            output_channels=up_out_channels,
                                            kernel_size=kernel_size,
                                            bias=bias,
                                            normalization=normalization,
                                            activation=activation,
                                           )
            self.decoder.append(torch.nn.ModuleList([upsample_layer, conv_layer]))

forward(sv_kernel, field)

Forward model.

Parameters:

  • sv_kernel (list of torch.tensor) –
        Learned spatially varying kernels.
        Dimension of each element in the list: (1, C_i * kernel_size * kernel_size, H_i, W_i),
        where C_i, H_i, and W_i represent the channel, height, and width
        of each feature at a certain scale.
    
  • field
        Input field data.
        Dimension: (1, 6, H, W)
    

Returns:

  • target_field ( tensor ) –

    Estimated output. Dimension: (1, 6, H, W)

Source code in odak/learn/models/models.py
def forward(self, sv_kernel, field):
    """
    Forward model.

    Parameters
    ----------
    sv_kernel : list of torch.tensor
                Learned spatially varying kernels.
                Dimension of each element in the list: (1, C_i * kernel_size * kernel_size, H_i, W_i),
                where C_i, H_i, and W_i represent the channel, height, and width
                of each feature at a certain scale.

    field     : torch.tensor
                Input field data.
                Dimension: (1, 6, H, W)

    Returns
    -------
    target_field : torch.tensor
                   Estimated output.
                   Dimension: (1, 6, H, W)
    """
    x = self.inc(field)
    downsampling_outputs = [x]
    for i, down_layer in enumerate(self.encoder):
        x_down = down_layer[0](downsampling_outputs[-1])
        downsampling_outputs.append(x_down)
        sam_output = down_layer[2](x_down + down_layer[1](x_down), sv_kernel[self.depth - i])
        downsampling_outputs.append(sam_output)
    global_feature = self.global_feature_module[0][0](downsampling_outputs[-1])
    global_feature = self.global_feature_module[0][1](downsampling_outputs[-1], global_feature)
    downsampling_outputs.append(global_feature)
    x_up = downsampling_outputs[-1]
    for i, up_layer in enumerate(self.decoder):
        x_up = up_layer[0](x_up, downsampling_outputs[2 * (self.depth - i)])
        x_up = up_layer[1](x_up)
    result = x_up
    return result

spatially_varying_kernel_generation_model

Bases: Module

Spatially_varying_kernel_generation_model revised from RSGUnet: https://github.com/MTLab/rsgunet_image_enhance.

Refer to: J. Huang, P. Zhu, M. Geng et al. Range Scaling Global U-Net for Perceptual Image Enhancement on Mobile Devices.

Source code in odak/learn/models/models.py
class spatially_varying_kernel_generation_model(torch.nn.Module):
    """
    Spatially_varying_kernel_generation_model revised from RSGUnet:
    https://github.com/MTLab/rsgunet_image_enhance.

    Refer to:
    J. Huang, P. Zhu, M. Geng et al. Range Scaling Global U-Net for Perceptual Image Enhancement on Mobile Devices.
    """

    def __init__(
                 self,
                 depth = 3,
                 dimensions = 8,
                 input_channels = 7,
                 kernel_size = 3,
                 bias = True,
                 normalization = False,
                 activation = torch.nn.LeakyReLU(0.2, inplace = True)
                ):
        """
        U-Net model.

        Parameters
        ----------
        depth          : int
                         Number of upsampling and downsampling layers.
        dimensions     : int
                         Number of dimensions.
        input_channels : int
                         Number of input channels.
        bias           : bool
                         Set to True to let convolutional layers learn a bias term.
        normalization  : bool
                         If True, adds a Batch Normalization layer after the convolutional layer.
        activation     : torch.nn
                         Non-linear activation layer (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()).
        """
        super().__init__()
        self.depth = depth
        self.inc = convolution_layer(
                                     input_channels = input_channels,
                                     output_channels = dimensions,
                                     kernel_size = kernel_size,
                                     bias = bias,
                                     normalization = normalization,
                                     activation = activation
                                    )
        self.encoder = torch.nn.ModuleList()
        for i in range(depth + 1):  # downsampling layers
            if i == 0:
                in_channels = dimensions * (2 ** i)
                out_channels = dimensions * (2 ** i)
            elif i == depth:
                in_channels = dimensions * (2 ** (i - 1))
                out_channels = dimensions * (2 ** (i - 1))
            else:
                in_channels = dimensions * (2 ** (i - 1))
                out_channels = 2 * in_channels
            pooling_layer = torch.nn.AvgPool2d(2)
            double_convolution_layer = double_convolution(
                                                          input_channels = in_channels,
                                                          mid_channels = in_channels,
                                                          output_channels = out_channels,
                                                          kernel_size = kernel_size,
                                                          bias = bias,
                                                          normalization = normalization,
                                                          activation = activation
                                                         )
            self.encoder.append(pooling_layer)
            self.encoder.append(double_convolution_layer)
        self.spatially_varying_feature = torch.nn.ModuleList()  # for kernel generation
        for i in range(depth, -1, -1):
            if i == 1:
                svf_in_channels = dimensions + 2 ** (self.depth + i) + 1
            else:
                svf_in_channels = 2 ** (self.depth + i) + 1
            svf_out_channels = (2 ** (self.depth + i)) * (kernel_size * kernel_size)
            svf_mid_channels = dimensions * (2 ** (self.depth - 1))
            spatially_varying_kernel_generation = torch.nn.ModuleList()
            for j in range(i, -1, -1):
                pooling_layer = torch.nn.AvgPool2d(2 ** (j + 1))
                spatially_varying_kernel_generation.append(pooling_layer)
            kernel_generation_block = torch.nn.Sequential(
                torch.nn.Conv2d(
                                in_channels = svf_in_channels,
                                out_channels = svf_mid_channels,
                                kernel_size = kernel_size,
                                padding = kernel_size // 2,
                                bias = bias
                               ),
                activation,
                torch.nn.Conv2d(
                                in_channels = svf_mid_channels,
                                out_channels = svf_mid_channels,
                                kernel_size = kernel_size,
                                padding = kernel_size // 2,
                                bias = bias
                               ),
                activation,
                torch.nn.Conv2d(
                                in_channels = svf_mid_channels,
                                out_channels = svf_out_channels,
                                kernel_size = kernel_size,
                                padding = kernel_size // 2,
                                bias = bias
                               ),
            )
            spatially_varying_kernel_generation.append(kernel_generation_block)
            self.spatially_varying_feature.append(spatially_varying_kernel_generation)
        self.decoder = torch.nn.ModuleList()
        global_feature_layer = global_feature_module(  # global feature layer
                                                     input_channels = dimensions * (2 ** (depth - 1)),
                                                     mid_channels = dimensions * (2 ** (depth - 1)),
                                                     output_channels = dimensions * (2 ** (depth - 1)),
                                                     kernel_size = kernel_size,
                                                     bias = bias,
                                                     activation = torch.nn.LeakyReLU(0.2, inplace = True)
                                                    )
        self.decoder.append(global_feature_layer)
        for i in range(depth, 0, -1):
            if i == 2:
                up_in_channels = (dimensions // 2) * (2 ** i)
                up_out_channels = up_in_channels
                up_mid_channels = up_in_channels
            elif i == 1:
                up_in_channels = dimensions * 2
                up_out_channels = dimensions
                up_mid_channels = up_out_channels
            else:
                up_in_channels = (dimensions // 2) * (2 ** i)
                up_out_channels = up_in_channels // 2
                up_mid_channels = up_in_channels
            upsample_layer = upsample_convtranspose2d_layer(
                                                            input_channels = up_in_channels,
                                                            output_channels = up_mid_channels,
                                                            kernel_size = 2,
                                                            stride = 2,
                                                            bias = bias,
                                                           )
            conv_layer = double_convolution(
                                            input_channels = up_mid_channels,
                                            output_channels = up_out_channels,
                                            kernel_size = kernel_size,
                                            bias = bias,
                                            normalization = normalization,
                                            activation = activation,
                                           )
            self.decoder.append(torch.nn.ModuleList([upsample_layer, conv_layer]))


    def forward(self, focal_surface, field):
        """
        Forward model.

        Parameters
        ----------
        focal_surface : torch.tensor
                        Input focal surface data.
                        Dimension: (1, 1, H, W)

        field         : torch.tensor
                        Input field data.
                        Dimension: (1, 6, H, W)

        Returns
        -------
        sv_kernel : list of torch.tensor
                    Learned spatially varying kernels.
                    Dimension of each element in the list: (1, C_i * kernel_size * kernel_size, H_i, W_i),
                    where C_i, H_i, and W_i represent the channel, height, and width
                    of each feature at a certain scale.
        """
        x = self.inc(torch.cat((focal_surface, field), dim = 1))
        downsampling_outputs = [focal_surface]
        downsampling_outputs.append(x)
        for i, down_layer in enumerate(self.encoder):
            x_down = down_layer(downsampling_outputs[-1])
            downsampling_outputs.append(x_down)
        sv_kernels = []
        for i, (up_layer, svf_layer) in enumerate(zip(self.decoder, self.spatially_varying_feature)):
            if i == 0:
                global_feature = up_layer(downsampling_outputs[-2], downsampling_outputs[-1])
                downsampling_outputs[-1] = global_feature
                sv_feature = [global_feature, downsampling_outputs[0]]
                for j in range(self.depth - i + 1):
                    sv_feature[1] = svf_layer[self.depth - i](sv_feature[1])
                    if j > 0:
                        sv_feature.append(svf_layer[j](downsampling_outputs[2 * j]))
                sv_feature = [sv_feature[0], sv_feature[1], sv_feature[4], sv_feature[2],
                              sv_feature[3]]
                sv_kernel = svf_layer[-1](torch.cat(sv_feature, dim = 1))
                sv_kernels.append(sv_kernel)
            else:
                x_up = up_layer[0](downsampling_outputs[-1],
                                   downsampling_outputs[2 * (self.depth + 1 - i) + 1])
                x_up = up_layer[1](x_up)
                downsampling_outputs[-1] = x_up
                sv_feature = [x_up, downsampling_outputs[0]]
                for j in range(self.depth - i + 1):
                    sv_feature[1] = svf_layer[self.depth - i](sv_feature[1])
                    if j > 0:
                        sv_feature.append(svf_layer[j](downsampling_outputs[2 * j]))
                if i == 1:
                    sv_feature = [sv_feature[0], sv_feature[1], sv_feature[3], sv_feature[2]]
                sv_kernel = svf_layer[-1](torch.cat(sv_feature, dim = 1))
                sv_kernels.append(sv_kernel)
        return sv_kernels

__init__(depth=3, dimensions=8, input_channels=7, kernel_size=3, bias=True, normalization=False, activation=torch.nn.LeakyReLU(0.2, inplace=True))

U-Net model.

Parameters:

  • depth
             Number of upsampling and downsampling layers.
    
  • dimensions
             Number of dimensions.
    
  • input_channels (int, default: 7 ) –
             Number of input channels.
    
  • bias
             Set to True to let convolutional layers learn a bias term.
    
  • normalization
             If True, adds a Batch Normalization layer after the convolutional layer.
    
  • activation
             Non-linear activation layer (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()).
    
Source code in odak/learn/models/models.py
def __init__(
             self,
             depth = 3,
             dimensions = 8,
             input_channels = 7,
             kernel_size = 3,
             bias = True,
             normalization = False,
             activation = torch.nn.LeakyReLU(0.2, inplace = True)
            ):
    """
    U-Net model.

    Parameters
    ----------
    depth          : int
                     Number of upsampling and downsampling layers.
    dimensions     : int
                     Number of dimensions.
    input_channels : int
                     Number of input channels.
    bias           : bool
                     Set to True to let convolutional layers learn a bias term.
    normalization  : bool
                     If True, adds a Batch Normalization layer after the convolutional layer.
    activation     : torch.nn
                     Non-linear activation layer (e.g., torch.nn.ReLU(), torch.nn.Sigmoid()).
    """
    super().__init__()
    self.depth = depth
    self.inc = convolution_layer(
                                 input_channels = input_channels,
                                 output_channels = dimensions,
                                 kernel_size = kernel_size,
                                 bias = bias,
                                 normalization = normalization,
                                 activation = activation
                                )
    self.encoder = torch.nn.ModuleList()
    for i in range(depth + 1):  # downsampling layers
        if i == 0:
            in_channels = dimensions * (2 ** i)
            out_channels = dimensions * (2 ** i)
        elif i == depth:
            in_channels = dimensions * (2 ** (i - 1))
            out_channels = dimensions * (2 ** (i - 1))
        else:
            in_channels = dimensions * (2 ** (i - 1))
            out_channels = 2 * in_channels
        pooling_layer = torch.nn.AvgPool2d(2)
        double_convolution_layer = double_convolution(
                                                      input_channels = in_channels,
                                                      mid_channels = in_channels,
                                                      output_channels = out_channels,
                                                      kernel_size = kernel_size,
                                                      bias = bias,
                                                      normalization = normalization,
                                                      activation = activation
                                                     )
        self.encoder.append(pooling_layer)
        self.encoder.append(double_convolution_layer)
    self.spatially_varying_feature = torch.nn.ModuleList()  # for kernel generation
    for i in range(depth, -1, -1):
        if i == 1:
            svf_in_channels = dimensions + 2 ** (self.depth + i) + 1
        else:
            svf_in_channels = 2 ** (self.depth + i) + 1
        svf_out_channels = (2 ** (self.depth + i)) * (kernel_size * kernel_size)
        svf_mid_channels = dimensions * (2 ** (self.depth - 1))
        spatially_varying_kernel_generation = torch.nn.ModuleList()
        for j in range(i, -1, -1):
            pooling_layer = torch.nn.AvgPool2d(2 ** (j + 1))
            spatially_varying_kernel_generation.append(pooling_layer)
        kernel_generation_block = torch.nn.Sequential(
            torch.nn.Conv2d(
                            in_channels = svf_in_channels,
                            out_channels = svf_mid_channels,
                            kernel_size = kernel_size,
                            padding = kernel_size // 2,
                            bias = bias
                           ),
            activation,
            torch.nn.Conv2d(
                            in_channels = svf_mid_channels,
                            out_channels = svf_mid_channels,
                            kernel_size = kernel_size,
                            padding = kernel_size // 2,
                            bias = bias
                           ),
            activation,
            torch.nn.Conv2d(
                            in_channels = svf_mid_channels,
                            out_channels = svf_out_channels,
                            kernel_size = kernel_size,
                            padding = kernel_size // 2,
                            bias = bias
                           ),
        )
        spatially_varying_kernel_generation.append(kernel_generation_block)
        self.spatially_varying_feature.append(spatially_varying_kernel_generation)
    self.decoder = torch.nn.ModuleList()
    global_feature_layer = global_feature_module(  # global feature layer
                                                 input_channels = dimensions * (2 ** (depth - 1)),
                                                 mid_channels = dimensions * (2 ** (depth - 1)),
                                                 output_channels = dimensions * (2 ** (depth - 1)),
                                                 kernel_size = kernel_size,
                                                 bias = bias,
                                                 activation = torch.nn.LeakyReLU(0.2, inplace = True)
                                                )
    self.decoder.append(global_feature_layer)
    for i in range(depth, 0, -1):
        if i == 2:
            up_in_channels = (dimensions // 2) * (2 ** i)
            up_out_channels = up_in_channels
            up_mid_channels = up_in_channels
        elif i == 1:
            up_in_channels = dimensions * 2
            up_out_channels = dimensions
            up_mid_channels = up_out_channels
        else:
            up_in_channels = (dimensions // 2) * (2 ** i)
            up_out_channels = up_in_channels // 2
            up_mid_channels = up_in_channels
        upsample_layer = upsample_convtranspose2d_layer(
                                                        input_channels = up_in_channels,
                                                        output_channels = up_mid_channels,
                                                        kernel_size = 2,
                                                        stride = 2,
                                                        bias = bias,
                                                       )
        conv_layer = double_convolution(
                                        input_channels = up_mid_channels,
                                        output_channels = up_out_channels,
                                        kernel_size = kernel_size,
                                        bias = bias,
                                        normalization = normalization,
                                        activation = activation,
                                       )
        self.decoder.append(torch.nn.ModuleList([upsample_layer, conv_layer]))

forward(focal_surface, field)

Forward model.

Parameters:

  • focal_surface (tensor) –
            Input focal surface data.
            Dimension: (1, 1, H, W)
    
  • field
            Input field data.
            Dimension: (1, 6, H, W)
    

Returns:

  • sv_kernel ( list of torch.tensor ) –

    Learned spatially varying kernels. Dimension of each element in the list: (1, C_i * kernel_size * kernel_size, H_i, W_i), where C_i, H_i, and W_i represent the channel, height, and width of each feature at a certain scale.

Source code in odak/learn/models/models.py
def forward(self, focal_surface, field):
    """
    Forward model.

    Parameters
    ----------
    focal_surface : torch.tensor
                    Input focal surface data.
                    Dimension: (1, 1, H, W)

    field         : torch.tensor
                    Input field data.
                    Dimension: (1, 6, H, W)

    Returns
    -------
    sv_kernel : list of torch.tensor
                Learned spatially varying kernels.
                Dimension of each element in the list: (1, C_i * kernel_size * kernel_size, H_i, W_i),
                where C_i, H_i, and W_i represent the channel, height, and width
                of each feature at a certain scale.
    """
    x = self.inc(torch.cat((focal_surface, field), dim = 1))
    downsampling_outputs = [focal_surface]
    downsampling_outputs.append(x)
    for i, down_layer in enumerate(self.encoder):
        x_down = down_layer(downsampling_outputs[-1])
        downsampling_outputs.append(x_down)
    sv_kernels = []
    for i, (up_layer, svf_layer) in enumerate(zip(self.decoder, self.spatially_varying_feature)):
        if i == 0:
            global_feature = up_layer(downsampling_outputs[-2], downsampling_outputs[-1])
            downsampling_outputs[-1] = global_feature
            sv_feature = [global_feature, downsampling_outputs[0]]
            for j in range(self.depth - i + 1):
                sv_feature[1] = svf_layer[self.depth - i](sv_feature[1])
                if j > 0:
                    sv_feature.append(svf_layer[j](downsampling_outputs[2 * j]))
            sv_feature = [sv_feature[0], sv_feature[1], sv_feature[4], sv_feature[2],
                          sv_feature[3]]
            sv_kernel = svf_layer[-1](torch.cat(sv_feature, dim = 1))
            sv_kernels.append(sv_kernel)
        else:
            x_up = up_layer[0](downsampling_outputs[-1],
                               downsampling_outputs[2 * (self.depth + 1 - i) + 1])
            x_up = up_layer[1](x_up)
            downsampling_outputs[-1] = x_up
            sv_feature = [x_up, downsampling_outputs[0]]
            for j in range(self.depth - i + 1):
                sv_feature[1] = svf_layer[self.depth - i](sv_feature[1])
                if j > 0:
                    sv_feature.append(svf_layer[j](downsampling_outputs[2 * j]))
            if i == 1:
                sv_feature = [sv_feature[0], sv_feature[1], sv_feature[3], sv_feature[2]]
            sv_kernel = svf_layer[-1](torch.cat(sv_feature, dim = 1))
            sv_kernels.append(sv_kernel)
    return sv_kernels

unet

Bases: Module

A U-Net model, heavily inspired from https://github.com/milesial/Pytorch-UNet/tree/master/unet and more can be read from Ronneberger, Olaf, Philipp Fischer, and Thomas Brox. "U-net: Convolutional networks for biomedical image segmentation." Medical Image Computing and Computer-Assisted Intervention–MICCAI 2015: 18th International Conference, Munich, Germany, October 5-9, 2015, Proceedings, Part III 18. Springer International Publishing, 2015.

Source code in odak/learn/models/models.py
class unet(torch.nn.Module):
    """
    A U-Net model, heavily inspired from `https://github.com/milesial/Pytorch-UNet/tree/master/unet` and more can be read from Ronneberger, Olaf, Philipp Fischer, and Thomas Brox. "U-net: Convolutional networks for biomedical image segmentation." Medical Image Computing and Computer-Assisted Intervention–MICCAI 2015: 18th International Conference, Munich, Germany, October 5-9, 2015, Proceedings, Part III 18. Springer International Publishing, 2015.
    """

    def __init__(
                 self, 
                 depth = 4,
                 dimensions = 64, 
                 input_channels = 2, 
                 output_channels = 1, 
                 bilinear = False,
                 kernel_size = 3,
                 bias = False,
                 activation = torch.nn.ReLU(inplace = True),
                ):
        """
        U-Net model.

        Parameters
        ----------
        depth             : int
                            Number of upsampling and downsampling
        dimensions        : int
                            Number of dimensions.
        input_channels    : int
                            Number of input channels.
        output_channels   : int
                            Number of output channels.
        bilinear          : bool
                            Uses bilinear upsampling in upsampling layers when set True.
        bias              : bool
                            Set True to let convolutional layers learn a bias term.
        activation        : torch.nn
                            Non-linear activation layer to be used (e.g., torch.nn.ReLU(), torch.nn.Sigmoid().
        """
        super(unet, self).__init__()
        self.inc = double_convolution(
                                      input_channels = input_channels,
                                      mid_channels = dimensions,
                                      output_channels = dimensions,
                                      kernel_size = kernel_size,
                                      bias = bias,
                                      activation = activation
                                     )      

        self.downsampling_layers = torch.nn.ModuleList()
        self.upsampling_layers = torch.nn.ModuleList()
        for i in range(depth): # downsampling layers
            in_channels = dimensions * (2 ** i)
            out_channels = dimensions * (2 ** (i + 1))
            down_layer = downsample_layer(in_channels,
                                            out_channels,
                                            kernel_size=kernel_size,
                                            bias=bias,
                                            activation=activation
                                            )
            self.downsampling_layers.append(down_layer)      

        for i in range(depth - 1, -1, -1):  # upsampling layers
            up_in_channels = dimensions * (2 ** (i + 1))  
            up_out_channels = dimensions * (2 ** i) 
            up_layer = upsample_layer(up_in_channels, up_out_channels, kernel_size=kernel_size, bias=bias, activation=activation, bilinear=bilinear)
            self.upsampling_layers.append(up_layer)
        self.outc = torch.nn.Conv2d(
                                    dimensions, 
                                    output_channels,
                                    kernel_size = kernel_size,
                                    padding = kernel_size // 2,
                                    bias = bias
                                   )


    def forward(self, x):
        """
        Forward model.

        Parameters
        ----------
        x             : torch.tensor
                        Input data.


        Returns
        ----------
        result        : torch.tensor
                        Estimated output.      
        """
        downsampling_outputs = [self.inc(x)]
        for down_layer in self.downsampling_layers:
            x_down = down_layer(downsampling_outputs[-1])
            downsampling_outputs.append(x_down)
        x_up = downsampling_outputs[-1]
        for i, up_layer in enumerate((self.upsampling_layers)):
            x_up = up_layer(x_up, downsampling_outputs[-(i + 2)])       
        result = self.outc(x_up)
        return result

__init__(depth=4, dimensions=64, input_channels=2, output_channels=1, bilinear=False, kernel_size=3, bias=False, activation=torch.nn.ReLU(inplace=True))

U-Net model.

Parameters:

  • depth
                Number of upsampling and downsampling
    
  • dimensions
                Number of dimensions.
    
  • input_channels
                Number of input channels.
    
  • output_channels
                Number of output channels.
    
  • bilinear
                Uses bilinear upsampling in upsampling layers when set True.
    
  • bias
                Set True to let convolutional layers learn a bias term.
    
  • activation
                Non-linear activation layer to be used (e.g., torch.nn.ReLU(), torch.nn.Sigmoid().
    
Source code in odak/learn/models/models.py
def __init__(
             self, 
             depth = 4,
             dimensions = 64, 
             input_channels = 2, 
             output_channels = 1, 
             bilinear = False,
             kernel_size = 3,
             bias = False,
             activation = torch.nn.ReLU(inplace = True),
            ):
    """
    U-Net model.

    Parameters
    ----------
    depth             : int
                        Number of upsampling and downsampling
    dimensions        : int
                        Number of dimensions.
    input_channels    : int
                        Number of input channels.
    output_channels   : int
                        Number of output channels.
    bilinear          : bool
                        Uses bilinear upsampling in upsampling layers when set True.
    bias              : bool
                        Set True to let convolutional layers learn a bias term.
    activation        : torch.nn
                        Non-linear activation layer to be used (e.g., torch.nn.ReLU(), torch.nn.Sigmoid().
    """
    super(unet, self).__init__()
    self.inc = double_convolution(
                                  input_channels = input_channels,
                                  mid_channels = dimensions,
                                  output_channels = dimensions,
                                  kernel_size = kernel_size,
                                  bias = bias,
                                  activation = activation
                                 )      

    self.downsampling_layers = torch.nn.ModuleList()
    self.upsampling_layers = torch.nn.ModuleList()
    for i in range(depth): # downsampling layers
        in_channels = dimensions * (2 ** i)
        out_channels = dimensions * (2 ** (i + 1))
        down_layer = downsample_layer(in_channels,
                                        out_channels,
                                        kernel_size=kernel_size,
                                        bias=bias,
                                        activation=activation
                                        )
        self.downsampling_layers.append(down_layer)      

    for i in range(depth - 1, -1, -1):  # upsampling layers
        up_in_channels = dimensions * (2 ** (i + 1))  
        up_out_channels = dimensions * (2 ** i) 
        up_layer = upsample_layer(up_in_channels, up_out_channels, kernel_size=kernel_size, bias=bias, activation=activation, bilinear=bilinear)
        self.upsampling_layers.append(up_layer)
    self.outc = torch.nn.Conv2d(
                                dimensions, 
                                output_channels,
                                kernel_size = kernel_size,
                                padding = kernel_size // 2,
                                bias = bias
                               )

forward(x)

Forward model.

Parameters:

  • x
            Input data.
    

Returns:

  • result ( tensor ) –

    Estimated output.

Source code in odak/learn/models/models.py
def forward(self, x):
    """
    Forward model.

    Parameters
    ----------
    x             : torch.tensor
                    Input data.


    Returns
    ----------
    result        : torch.tensor
                    Estimated output.      
    """
    downsampling_outputs = [self.inc(x)]
    for down_layer in self.downsampling_layers:
        x_down = down_layer(downsampling_outputs[-1])
        downsampling_outputs.append(x_down)
    x_up = downsampling_outputs[-1]
    for i, up_layer in enumerate((self.upsampling_layers)):
        x_up = up_layer(x_up, downsampling_outputs[-(i + 2)])       
    result = self.outc(x_up)
    return result

upsample_convtranspose2d_layer

Bases: Module

An upsampling convtranspose2d layer.

Source code in odak/learn/models/components.py
class upsample_convtranspose2d_layer(torch.nn.Module):
    """
    An upsampling convtranspose2d layer.
    """
    def __init__(
                 self,
                 input_channels,
                 output_channels,
                 kernel_size = 2,
                 stride = 2,
                 bias = False,
                ):
        """
        A downscaling component with a double convolution.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool
                          Set to True to let convolutional layers have bias term.
        """
        super().__init__()
        self.up = torch.nn.ConvTranspose2d(
                                           in_channels = input_channels,
                                           out_channels = output_channels,
                                           bias = bias,
                                           kernel_size = kernel_size,
                                           stride = stride
                                          )

    def forward(self, x1, x2):
        """
        Forward model.

        Parameters
        ----------
        x1             : torch.tensor
                         First input data.
        x2             : torch.tensor
                         Second input data.


        Returns
        ----------
        result        : torch.tensor
                        Result of the forward operation
        """
        x1 = self.up(x1)
        diffY = x2.size()[2] - x1.size()[2]
        diffX = x2.size()[3] - x1.size()[3]
        x1 = torch.nn.functional.pad(x1, [diffX // 2, diffX - diffX // 2,
                                          diffY // 2, diffY - diffY // 2])
        result = x1 + x2
        return result

__init__(input_channels, output_channels, kernel_size=2, stride=2, bias=False)

A downscaling component with a double convolution.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int) –
              Number of output channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
Source code in odak/learn/models/components.py
def __init__(
             self,
             input_channels,
             output_channels,
             kernel_size = 2,
             stride = 2,
             bias = False,
            ):
    """
    A downscaling component with a double convolution.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool
                      Set to True to let convolutional layers have bias term.
    """
    super().__init__()
    self.up = torch.nn.ConvTranspose2d(
                                       in_channels = input_channels,
                                       out_channels = output_channels,
                                       bias = bias,
                                       kernel_size = kernel_size,
                                       stride = stride
                                      )

forward(x1, x2)

Forward model.

Parameters:

  • x1
             First input data.
    
  • x2
             Second input data.
    

Returns:

  • result ( tensor ) –

    Result of the forward operation

Source code in odak/learn/models/components.py
def forward(self, x1, x2):
    """
    Forward model.

    Parameters
    ----------
    x1             : torch.tensor
                     First input data.
    x2             : torch.tensor
                     Second input data.


    Returns
    ----------
    result        : torch.tensor
                    Result of the forward operation
    """
    x1 = self.up(x1)
    diffY = x2.size()[2] - x1.size()[2]
    diffX = x2.size()[3] - x1.size()[3]
    x1 = torch.nn.functional.pad(x1, [diffX // 2, diffX - diffX // 2,
                                      diffY // 2, diffY - diffY // 2])
    result = x1 + x2
    return result

upsample_layer

Bases: Module

An upsampling convolutional layer.

Source code in odak/learn/models/components.py
class upsample_layer(torch.nn.Module):
    """
    An upsampling convolutional layer.
    """
    def __init__(
                 self,
                 input_channels,
                 output_channels,
                 kernel_size = 3,
                 bias = False,
                 activation = torch.nn.ReLU(),
                 bilinear = True
                ):
        """
        A downscaling component with a double convolution.

        Parameters
        ----------
        input_channels  : int
                          Number of input channels.
        output_channels : int
                          Number of output channels.
        kernel_size     : int
                          Kernel size.
        bias            : bool 
                          Set to True to let convolutional layers have bias term.
        activation      : torch.nn
                          Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
        bilinear        : bool
                          If set to True, bilinear sampling is used.
        """
        super(upsample_layer, self).__init__()
        if bilinear:
            self.up = torch.nn.Upsample(scale_factor = 2, mode = 'bilinear', align_corners = True)
            self.conv = double_convolution(
                                           input_channels = input_channels + output_channels,
                                           mid_channels = input_channels // 2,
                                           output_channels = output_channels,
                                           kernel_size = kernel_size,
                                           bias = bias,
                                           activation = activation
                                          )
        else:
            self.up = torch.nn.ConvTranspose2d(input_channels , input_channels // 2, kernel_size = 2, stride = 2)
            self.conv = double_convolution(
                                           input_channels = input_channels,
                                           mid_channels = output_channels,
                                           output_channels = output_channels,
                                           kernel_size = kernel_size,
                                           bias = bias,
                                           activation = activation
                                          )


    def forward(self, x1, x2):
        """
        Forward model.

        Parameters
        ----------
        x1             : torch.tensor
                         First input data.
        x2             : torch.tensor
                         Second input data.


        Returns
        ----------
        result        : torch.tensor
                        Result of the forward operation
        """ 
        x1 = self.up(x1)
        diffY = x2.size()[2] - x1.size()[2]
        diffX = x2.size()[3] - x1.size()[3]
        x1 = torch.nn.functional.pad(x1, [diffX // 2, diffX - diffX // 2,
                                          diffY // 2, diffY - diffY // 2])
        x = torch.cat([x2, x1], dim = 1)
        result = self.conv(x)
        return result

__init__(input_channels, output_channels, kernel_size=3, bias=False, activation=torch.nn.ReLU(), bilinear=True)

A downscaling component with a double convolution.

Parameters:

  • input_channels
              Number of input channels.
    
  • output_channels (int) –
              Number of output channels.
    
  • kernel_size
              Kernel size.
    
  • bias
              Set to True to let convolutional layers have bias term.
    
  • activation
              Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    
  • bilinear
              If set to True, bilinear sampling is used.
    
Source code in odak/learn/models/components.py
def __init__(
             self,
             input_channels,
             output_channels,
             kernel_size = 3,
             bias = False,
             activation = torch.nn.ReLU(),
             bilinear = True
            ):
    """
    A downscaling component with a double convolution.

    Parameters
    ----------
    input_channels  : int
                      Number of input channels.
    output_channels : int
                      Number of output channels.
    kernel_size     : int
                      Kernel size.
    bias            : bool 
                      Set to True to let convolutional layers have bias term.
    activation      : torch.nn
                      Nonlinear activation layer to be used. If None, uses torch.nn.ReLU().
    bilinear        : bool
                      If set to True, bilinear sampling is used.
    """
    super(upsample_layer, self).__init__()
    if bilinear:
        self.up = torch.nn.Upsample(scale_factor = 2, mode = 'bilinear', align_corners = True)
        self.conv = double_convolution(
                                       input_channels = input_channels + output_channels,
                                       mid_channels = input_channels // 2,
                                       output_channels = output_channels,
                                       kernel_size = kernel_size,
                                       bias = bias,
                                       activation = activation
                                      )
    else:
        self.up = torch.nn.ConvTranspose2d(input_channels , input_channels // 2, kernel_size = 2, stride = 2)
        self.conv = double_convolution(
                                       input_channels = input_channels,
                                       mid_channels = output_channels,
                                       output_channels = output_channels,
                                       kernel_size = kernel_size,
                                       bias = bias,
                                       activation = activation
                                      )

forward(x1, x2)

Forward model.

Parameters:

  • x1
             First input data.
    
  • x2
             Second input data.
    

Returns:

  • result ( tensor ) –

    Result of the forward operation

Source code in odak/learn/models/components.py
def forward(self, x1, x2):
    """
    Forward model.

    Parameters
    ----------
    x1             : torch.tensor
                     First input data.
    x2             : torch.tensor
                     Second input data.


    Returns
    ----------
    result        : torch.tensor
                    Result of the forward operation
    """ 
    x1 = self.up(x1)
    diffY = x2.size()[2] - x1.size()[2]
    diffX = x2.size()[3] - x1.size()[3]
    x1 = torch.nn.functional.pad(x1, [diffX // 2, diffX - diffX // 2,
                                      diffY // 2, diffY - diffY // 2])
    x = torch.cat([x2, x1], dim = 1)
    result = self.conv(x)
    return result

gaussian(x, multiplier=1.0)

A Gaussian non-linear activation. For more details: Ramasinghe, Sameera, and Simon Lucey. "Beyond periodicity: Towards a unifying framework for activations in coordinate-mlps." In European Conference on Computer Vision, pp. 142-158. Cham: Springer Nature Switzerland, 2022.

Parameters:

  • x
           Input data.
    
  • multiplier
           Multiplier.
    

Returns:

  • result ( float or tensor ) –

    Ouput data.

Source code in odak/learn/models/components.py
def gaussian(x, multiplier = 1.):
    """
    A Gaussian non-linear activation.
    For more details: Ramasinghe, Sameera, and Simon Lucey. "Beyond periodicity: Towards a unifying framework for activations in coordinate-mlps." In European Conference on Computer Vision, pp. 142-158. Cham: Springer Nature Switzerland, 2022.

    Parameters
    ----------
    x            : float or torch.tensor
                   Input data.
    multiplier   : float or torch.tensor
                   Multiplier.

    Returns
    -------
    result       : float or torch.tensor
                   Ouput data.
    """
    result = torch.exp(- (multiplier * x) ** 2)
    return result

swish(x)

A swish non-linear activation. For more details: https://en.wikipedia.org/wiki/Swish_function

Parameters:

  • x
             Input.
    

Returns:

  • out ( float or tensor ) –

    Output.

Source code in odak/learn/models/components.py
def swish(x):
    """
    A swish non-linear activation.
    For more details: https://en.wikipedia.org/wiki/Swish_function

    Parameters
    -----------
    x              : float or torch.tensor
                     Input.

    Returns
    -------
    out            : float or torch.tensor
                     Output.
    """
    out = x * torch.sigmoid(x)
    return out