Source code for shadow.utils

import random
import torch
import torch.utils.data
import numpy as np


[docs]def flatten_to_two_dim(input_tensor): r"""Flatten tensor along the first axis ([2, 3, 4] -> [2, 12]) Args: input_tensor (torch.Tensor): input tensor Returns: torch.Tensor: `input_tensor` flattened along first axis """ return input_tensor.view(input_tensor.shape[0], -1)
[docs]def set_seed(seed, cudnn_deterministic=False): r"""Sets the seeds for max reproducibility. Sets seeds for random, numpy, and torch to `seed`, and can also enable deterministic mode for the CuDNN backend. This does not guarantee full reproducibility as some underlying options (e.g. `atomicAdd`) still have sources of non-determinism that cannot be disabled. Args: seed (int): Seed used for `random`, `numpy`, and `torch`. cudnn_deterministic (bool, optional): Sets the CuDNN backend into deterministic mode. This can negatively impact performance. Defaults to False. .. note:: PyTorch provides only minimal guarantees on reproducibility. See <https://pytorch.org/docs/stable/notes/randomness.html> for more information. """ # Python seeding random.seed(seed) # Numpy seeding np.random.seed(seed) # Torch seeding torch.manual_seed(seed) # CuDNN deterministic seeding-can impact performance if cudnn_deterministic: torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False
class _CWScheduler(object): """Base class for consistency weight schedulers. The base class for consistency weight schedulers. This base should not be instantiated directly. Consistency weight schedulers should implement `_make_ramp`. """ def __init__(self): super(_CWScheduler, self).__init__() self._make_ramp() self.it = 0 def __call__(self): r"""The current consistency weight. Returns: float: The current scheduled consistency weight. """ try: return self.ramp[self.it] except IndexError: return self.ramp[-1] def step(self, increment=1): r"""Update the scheduler to the next step. Args: it (int): Number of steps to take. Defaults to 1. """ self.it += increment
[docs]class ConstantCW(_CWScheduler): r"""Constant valued consistency weight scheduler. Scheduler function to control a weight, often used to weigh a consistency cost relative to a supervised learning cost (e.g. Cross Entropy). This is intended to be stepped after each epoch during training to increase or decrease the weight accordingly. This provides a constant weighting function that does not change. Args: last_weight (float, optional): Final consistency weight. Defaults to 1. Example: >>> alpha = ConstantCW(last_weight) >>> for epoch in epochs: >>> train(...) >>> loss = criterion + alpha() * consistency >>> validate(...) >>> alpha.step() """ def __init__(self, last_weight=1): self.last_weight = last_weight super(ConstantCW, self).__init__() def _make_ramp(self): self.ramp = [self.last_weight]
[docs]class SigmoidCW(_CWScheduler): r"""Sigmoidal consistency weight scheduler. Scheduler function to control a weight, often used to weigh a consistency cost relative to a supervised learning cost (e.g. Cross Entropy). This is intended to be stepped after each epoch during training to increase or decrease the weight accordingly. This provides a sigmoidal weighting function. Args: last_epoch (int): Number of epochs until scheduler reaches `last_weight`. last_weight (float, optional): Final consistency weight. Defaults to 1. first_weight (float, optional): Consistency weight at beginning of ramp. Defaults to 0. epochs_before (int, optional): Number of epochs to hold weight at `first_weight` before beginning ramp. Defaults to 0. Example: >>> alpha = SigmoidCW(last_epoch, last_weight, first_weight, epochs_before) >>> for epoch in epochs: >>> train(...) >>> loss = criterion + alpha() * consistency >>> validate(...) >>> alpha.step() """ def __init__(self, last_epoch, last_weight=1, first_weight=0, epochs_before=0): self.last_epoch = last_epoch self.last_weight = last_weight self.first_weight = first_weight self.epochs_before = epochs_before super(SigmoidCW, self).__init__() def _make_ramp(self): # Calculate a range of sigmoid values sigmoid_steps = 1 / (1 + np.exp(-np.linspace(-10, 10, self.last_epoch))) # Scale the sigmoid from first_weight to last_weight sigmoid_steps = sigmoid_steps * (self.last_weight - self.first_weight) + self.first_weight # Keep weight fixed initially self.ramp = [self.first_weight] * self.epochs_before + sigmoid_steps.tolist()
[docs]class RampCW(_CWScheduler): r"""Linear ramp consistency weight scheduler. Scheduler function to control a weight, often used to weigh a consistency cost relative to a supervised learning cost (e.g. Cross Entropy). This is intended to be stepped after each epoch during training to increase or decrease the weight accordingly. This provides a linear ramp weighting function. Args: last_epoch (int): Number of epochs until scheduler reaches `last_weight`. last_weight (float, optional): Final consistency weight. Defaults to 1. first_weight (float, optional): Consistency weight at beginning of ramp. Defaults to 0. epochs_before (int, optional): Number of epochs to hold weight at `first_weight` before beginning ramp. Defaults to 0. Example: >>> alpha = RampCW(last_epoch, last_weight, first_weight, epochs_before) >>> for epoch in epochs: >>> train(...) >>> loss = criterion + alpha() * consistency >>> validate(...) >>> alpha.step() """ def __init__(self, last_epoch, last_weight=1, first_weight=0, epochs_before=0): self.last_epoch = last_epoch self.last_weight = last_weight self.first_weight = first_weight self.epochs_before = epochs_before super(RampCW, self).__init__() def _make_ramp(self): ramp_steps = np.linspace(self.first_weight, self.last_weight, self.last_epoch) # Keep weight fixed initially self.ramp = [self.first_weight] * self.epochs_before + ramp_steps.tolist()
[docs]class StepCW(_CWScheduler): r"""Step function consistency weight scheduler. Scheduler function to control a weight, often used to weigh a consistency cost relative to a supervised learning cost (e.g. Cross Entropy). This is intended to be stepped after each epoch during training to increase or decrease the weight accordingly. This provides a step weighting function. Args: last_epoch (int): Number of epochs until scheduler reaches `last_weight`. last_weight (float, optional): Final consistency weight. Defaults to 1. first_weight (float, optional): Consistency weight at beginning of ramp. Defaults to 0. Example: >>> alpha = StepCW(last_epoch, last_weight, first_weight) >>> for epoch in epochs: >>> train(...) >>> loss = criterion + alpha() * consistency >>> validate(...) >>> alpha.step() """ def __init__(self, last_epoch, last_weight=1, first_weight=0): self.last_epoch = last_epoch self.last_weight = last_weight self.first_weight = first_weight super(StepCW, self).__init__() def _make_ramp(self): self.ramp = [self.first_weight] * self.last_epoch + [self.last_weight]
[docs]class QuadraticCW(_CWScheduler): r"""Quadratic consistency weight scheduler. Scheduler function to control a weight, often used to weigh a consistency cost relative to a supervised learning cost (e.g. Cross Entropy). This is intended to be stepped after each epoch during training to increase or decrease the weight accordingly. This provides a quadratic weighting function. Args: last_epoch (int): Number of epochs until scheduler reaches `last_weight`. last_weight (float, optional): Final consistency weight. Defaults to 1. first_weight (float, optional): Consistency weight at beginning of ramp. Defaults to 0. epochs_before (int, optional): Number of epochs to hold weight at `first_weight` before beginning ramp. Defaults to 0. Example: >>> alpha = QuadraticCW(last_epoch, last_weight, first_weight, epochs_before) >>> for epoch in epochs: >>> train(...) >>> loss = criterion + alpha() * consistency >>> validate(...) >>> alpha.step() """ def __init__(self, last_epoch, last_weight=1, first_weight=0, epochs_before=0): self.last_epoch = last_epoch self.last_weight = last_weight self.first_weight = first_weight self.epochs_before = epochs_before super(QuadraticCW, self).__init__() def _make_ramp(self): steps = -(np.linspace(-1, 0, self.last_epoch) ** 2) + 1 # Scale the steps from first_weight to last_weight steps = steps * (self.last_weight - self.first_weight) + self.first_weight # Keep weight fixed initially self.ramp = [self.first_weight] * self.epochs_before + steps.tolist()
[docs]def init_model_weights(model, value): r"""Set all weights in model to a given value. Args: model (torch.nn.Module): The model to update. Weight update is performed in place. value (float): The weight value. """ def init_weights(m): try: m.weight.data.fill_(value) # Will throw an AttributeError if this layer type has no weight field except AttributeError: pass model.apply(init_weights)
def _print_model_parameters(model): r""" Print the names and values of all modules in the network. Args: model (torch.nn.Module): The model to report information about. Returns: No return value """ for name, param in model.named_parameters(): print(name) print(param.data)
[docs]class SkewedSigmoidCW(_CWScheduler): r"""Skewed sigmoidal consistency weight scheduler with variable ramp up speed. Scheduler function to control a weight, often used to weigh a consistency cost relative to a supervised learning cost (e.g. Cross Entropy). This is intended to be stepped after each epoch during training to increase or decrease the weight accordingly. This provides a skewed sigmoid weighting function with variable ramp up timing speed. Args: last_epoch (int): Number of epochs until scheduler reaches `last_weight`. last_weight (float, optional): Final consistency weight. Defaults to 1. first_weight (float, optional): Consistency weight at beginning of ramp. Defaults to 0. epochs_before (int, optional): Number of epochs to hold weight at `first_weight` before beginning ramp. Defaults to 0. beta (float, optional): Controls how sharp the rise from `first_weight` to `last_weight` is. `beta` = 1 corresponds to a standard sigmoid. Increasing `beta` increases sharpness. Negative values can actually invert the sigmoid for a decreasing ramp. Defaults to 1. zeta (float, optional): Skews when the rise from `first_weight` to `last_weight` occurs. `zeta` = 1 corresponds to a rise centered about the middle epoch. `zeta` = 0 corresponds to a flat weight at `last_weight`. `zeta` < 1 shifts rise to earlier epochs. `zeta` > 1 shifts to later epochs. Defaults to 1. Example: >>> alpha = SkewedSigmoidCW(last_epoch, last_weight, first_weight, epochs_before, beta, zeta) >>> for epoch in epochs: >>> train(...) >>> loss = criterion + alpha() * consistency >>> validate(...) >>> alpha.step() """ def __init__(self, last_epoch, last_weight=1, first_weight=0, epochs_before=0, beta=1, zeta=1): self.last_epoch = last_epoch self.last_weight = last_weight self.first_weight = first_weight self.epochs_before = epochs_before self.beta = beta self.zeta = zeta super(SkewedSigmoidCW, self).__init__() def _make_ramp(self): # Calculate a range of sigmoid values x = np.linspace(-10, 10, self.last_epoch, endpoint=True) # Calculate the linear sampling grid along normal sigmoid i = np.linspace(0, 1, self.last_epoch, endpoint=True) # skew the sampling grid to sample nonlinear along the sigmoid x = i**self.zeta * 2 * max(x) + min(x) # Calculate the sigmoid along the nonlinear axis sigmoid_steps = 1.0 / (1 + np.exp(-self.beta * (x))) # Scale the sigmoid from first_weight to last_weight sigmoid_steps = sigmoid_steps * (self.last_weight - self.first_weight) + self.first_weight # Keep weight fixed initially self.ramp = [self.first_weight] * self.epochs_before + sigmoid_steps.tolist()
[docs]class IgnoreUnlabeledWrapper(torch.nn.Module): r"""Wraps a loss function to filter out mising values for a Semi-Supervised learning task. Args: criterion (callable): Used to compute the supervised loss. ignore_index (bool, int, float, complex, optional): Specifies a target value that is ignored and does not contribute to the input gradient. Defaults to negative infinity. Example: >>> ssml_loss = IgnoreUnlabeledWrapper(criterion=torch.nn.MSELoss()) >>> y_true = torch.rand(3, 1) >>> y_hat = y_true.clone() >>> y_hat tensor([[0.1543], [0.1572], [0.0404]]) >>> ssml_loss(y_hat, y_true) tensor(0.) >>> y_true[1] = np.NINF >>> y_true tensor([[0.1543], [ -inf], [0.0404]]) >>> ssml_loss(y_hat, y_true) tensor(0.) Example: >>> ssml_loss = IgnoreUnlabeledWrapper(criterion=torch.nn.BCELoss()) >>> y_hat = torch.Tensor([[0], [1], [1], [0]]) >>> y_true = torch.Tensor([[ignore_index], [1], [ignore_index], [1]]) >>> ssml_loss(y_hat, y_true) tensor(50.) """ def __init__(self, criterion, ignore_index=np.NINF): super(IgnoreUnlabeledWrapper, self).__init__() self.criterion = criterion self.ignore_index = ignore_index
[docs] def forward(self, y_hat, y_true): # if we have a 0 dimensional label tensor then there is nothing to ignore # pass y_hat and y_true to criterion for proper handling if len(y_hat.shape) == 0: return self.criterion(y_hat, y_true) # determine which argument contains the `ignore_index` if self.ignore_index in y_true: y = y_true elif self.ignore_index in y_hat: y = y_hat else: # return if there is no `ignore_index` present to avoid # indexing and unnecessarily removing a dimension return self.criterion(y_hat, y_true) # create a boolean tensor of shape y.shape that describes where the ignore indexes are labeled_indexes = y != self.ignore_index if len(y.shape) > 1: # if y is multidimensional, then we drop all samples where `ignore_index` # is present along the last dimension labeled_indexes = labeled_indexes.all(dim=-1) # get the values by index that are not `ignore_index` y_hat_ = y_hat[labeled_indexes] y_true_ = y_true[labeled_indexes] return self.criterion(y_hat_, y_true_)