torch.optim.AdafactorOptions
export interface AdafactorOptions {
/** Learning rate. If null, uses relative step size (default: null) */
lr?: number | null;
/** Term added to denominator for numerical stability (default: [1e-30, 1e-3]) */
eps?: [number, number];
/** Clip threshold for update magnitude (default: 1.0) */
clip_threshold?: number;
/** Coefficient for running average of squared gradient (default: -0.8) */
decay_rate?: number;
/** Coefficient for running average of gradient (default: null, no momentum) */
beta1?: number | null;
/** Weight decay (default: 0.0) */
weight_decay?: number;
/** Use relative step sizes (default: true) */
scale_parameter?: boolean;
/** Use relative learning rate schedule (default: true) */
relative_step?: boolean;
/** Warmup initialization for relative step sizes (default: false) */
warmup_init?: boolean;
}lr(number | null)optional- – Learning rate. If null, uses relative step size (default: null)
eps([number, number])optional- – Term added to denominator for numerical stability (default: [1e-30, 1e-3])
clip_threshold(number)optional- – Clip threshold for update magnitude (default: 1.0)
decay_rate(number)optional- – Coefficient for running average of squared gradient (default: -0.8)
beta1(number | null)optional- – Coefficient for running average of gradient (default: null, no momentum)
weight_decay(number)optional- – Weight decay (default: 0.0)
scale_parameter(boolean)optional- – Use relative step sizes (default: true)
relative_step(boolean)optional- – Use relative learning rate schedule (default: true)
warmup_init(boolean)optional- – Warmup initialization for relative step sizes (default: false)
Options for Adafactor optimizer.