torch.nn.Parameter
A Parameter is a special Tensor that represents a learnable weight or bias in a neural network.
Parameters are the core of trainable neural networks. When you assign a Parameter to a Module,
it is automatically registered and included in module.parameters(), making it optimizable.
Essential for:
- Creating learnable weights in linear layers, convolutions, embeddings
- Defining trainable biases for layers
- Managing learnable scales and shifts in normalization layers
- Any tensor that should be optimized during training
Key differences from regular Tensors:
- Automatically tracked by parent Module's parameter list
requires_grad=trueby default (unlike regular tensors)- Automatically optimized by optimizers
- Included in
state_dict()for model persistence - Gradients accumulated during backward pass
When to use Parameter vs Tensor:
- Use Parameter: For weights, biases, and learnable parameters
- Use Tensor: For inputs, activations, intermediate computations
- Use Buffer: For non-learnable state like batch norm running statistics
- Automatic registration: Parameters assigned as module attributes are automatically registered
- Gradient tracking: Parameters have
requires_grad=trueby default, enabling gradient computation - Optimizer integration: Optimizers iterate over
module.parameters()to update Parameter values - Persistence: Parameters included in
module.state_dict()for saving/loading models - Initialization: Consider using torch.nn.init functions to properly initialize Parameters
- Type conversion: Use
.to(dtype)to change parameter dtype without losing the Parameter type - No in-place mutations: Avoid in-place operations on parameters during forward pass for clean gradients
Examples
// Creating a learnable weight matrix for a custom layer
class MyLinear extends torch.nn.Module {
weight: torch.nn.Parameter;
bias: torch.nn.Parameter;
constructor(in_features: number, out_features: number) {
super();
// Initialize weight and bias as Parameters
this.weight = torch.nn.Parameter.create(
torch.randn([out_features, in_features]),
{ requires_grad: true }
);
this.bias = torch.nn.Parameter.create(
torch.zeros([out_features]),
{ requires_grad: true }
);
// Register them with the module
this.register_parameter('weight', this.weight);
this.register_parameter('bias', this.bias);
}
forward(x: torch.Tensor): torch.Tensor {
// x: [batch, in_features]
// output: [batch, out_features]
return torch.matmul(x, this.weight.t()).add(this.bias);
}
}
const layer = new MyLinear(10, 5);
const x = torch.randn([32, 10]);
const y = layer.forward(x); // [32, 5]// Creating parameters with specific dtype
const weight_float32 = torch.nn.Parameter.create(
torch.randn([64, 128]),
{ dtype: 'float32', requires_grad: true }
);
const weight_int32 = torch.nn.Parameter.create(
[1, 2, 3, 4],
{ dtype: 'int32', requires_grad: false }
);// Converting existing tensor to parameter
const existing_tensor = torch.randn([32, 32]);
const param = torch.nn.Parameter.create(existing_tensor, { requires_grad: true });
// Accessing parameter values
console.log(param.shape); // [32, 32]
console.log(param.dtype); // 'float32'
console.log(param.requires_grad); // true (computed during backward)// Parameters in ResNet-style residual block
class ResNetBlock extends torch.nn.Module {
conv1: torch.nn.Conv2d;
bn1: torch.nn.BatchNorm2d;
conv2: torch.nn.Conv2d;
bn2: torch.nn.BatchNorm2d;
constructor(channels: number) {
super();
this.conv1 = new torch.nn.Conv2d(channels, channels, 3, { padding: 1 });
this.bn1 = new torch.nn.BatchNorm2d(channels);
this.conv2 = new torch.nn.Conv2d(channels, channels, 3, { padding: 1 });
this.bn2 = new torch.nn.BatchNorm2d(channels);
}
forward(x: torch.Tensor): torch.Tensor {
const residual = x;
let out = this.conv1.forward(x);
out = this.bn1.forward(out);
out = torch.nn.functional.relu(out);
out = this.conv2.forward(out);
out = this.bn2.forward(out);
// Each conv and bn layer has Parameters that will be optimized
out = out.add(residual);
return torch.nn.functional.relu(out);
}
}