torch.optim.AdamWOptions
export interface AdamWOptions {
/** Learning rate (default: 1e-3) */
lr?: number;
/** Coefficients for computing running averages of gradient and its square (default: [0.9, 0.999]) */
betas?: [number, number];
/** Term added to the denominator to improve numerical stability (default: 1e-8) */
eps?: number;
/** Weight decay coefficient (default: 1e-2) */
weight_decay?: number;
/** Whether to use the AMSGrad variant (default: false) */
amsgrad?: boolean;
/** Whether to use foreach implementation (default: false) */
foreach?: boolean;
/** Maximize the objective with respect to params (default: false) */
maximize?: boolean;
/** Whether the implementation is capturable (default: false) */
capturable?: boolean;
/** Whether to use a differentiable learning rate (default: false) */
differentiable?: boolean;
/** Whether to use fused implementation (default: false) */
fused?: boolean;
}lr(number)optional- – Learning rate (default: 1e-3)
betas([number, number])optional- – Coefficients for computing running averages of gradient and its square (default: [0.9, 0.999])
eps(number)optional- – Term added to the denominator to improve numerical stability (default: 1e-8)
weight_decay(number)optional- – Weight decay coefficient (default: 1e-2)
amsgrad(boolean)optional- – Whether to use the AMSGrad variant (default: false)
foreach(boolean)optional- – Whether to use foreach implementation (default: false)
maximize(boolean)optional- – Maximize the objective with respect to params (default: false)
capturable(boolean)optional- – Whether the implementation is capturable (default: false)
differentiable(boolean)optional- – Whether to use a differentiable learning rate (default: false)
fused(boolean)optional- – Whether to use fused implementation (default: false)
Options for AdamW optimizer.