torch.nn.MultiheadAttnOptions

export interface MultiheadAttnOptions {
  /** Mask to exclude keys from attention */
  key_padding_mask?: Tensor;
  /** Whether to return the attention weights (default: true) */
  need_weights?: boolean;
  /** Mask to prevent attention to certain positions */
  attn_mask?: Tensor;
  /** Whether to average the attention weights across heads (default: true) */
  average_attn_weights?: boolean;
  /** If true, applies causal masking (default: false) */
  is_causal?: boolean;
}

key_padding_mask(Tensor)optional: – Mask to exclude keys from attention
need_weights(boolean)optional: – Whether to return the attention weights (default: true)
attn_mask(Tensor)optional: – Mask to prevent attention to certain positions
average_attn_weights(boolean)optional: – Whether to average the attention weights across heads (default: true)
is_causal(boolean)optional: – If true, applies causal masking (default: false)

Options for multihead_attn operation

torch.nn.MultiheadAttnOptions

export interface MultiheadAttnOptions {
  /** Mask to exclude keys from attention */
  key_padding_mask?: Tensor;
  /** Whether to return the attention weights (default: true) */
  need_weights?: boolean;
  /** Mask to prevent attention to certain positions */
  attn_mask?: Tensor;
  /** Whether to average the attention weights across heads (default: true) */
  average_attn_weights?: boolean;
  /** If true, applies causal masking (default: false) */
  is_causal?: boolean;
}

key_padding_mask(Tensor)optional: – Mask to exclude keys from attention
need_weights(boolean)optional: – Whether to return the attention weights (default: true)
attn_mask(Tensor)optional: – Mask to prevent attention to certain positions
average_attn_weights(boolean)optional: – Whether to average the attention weights across heads (default: true)
is_causal(boolean)optional: – If true, applies causal masking (default: false)

Options for multihead_attn operation