torch.nn.functional.MultiHeadAttentionFunctionalOptions
export interface MultiHeadAttentionFunctionalOptions {
/** Whether the module is in training mode. (default: true) */
training?: boolean;
/** Mask to exclude keys from attention. (default: null) */
key_padding_mask?: Tensor | null;
/** Whether to return the attention weights. (default: true) */
need_weights?: boolean;
/** Mask to prevent attention to certain positions. (default: null) */
attn_mask?: Tensor | null;
/** Whether to use separate projection weights for Q, K, V. (default: false) */
use_separate_proj_weight?: boolean;
/** Separate projection weight for Query. (default: null) */
q_proj_weight?: Tensor | null;
/** Separate projection weight for Key. (default: null) */
k_proj_weight?: Tensor | null;
/** Separate projection weight for Value. (default: null) */
v_proj_weight?: Tensor | null;
/** Static Key for attention. (default: null) */
static_k?: Tensor | null;
/** Static Value for attention. (default: null) */
static_v?: Tensor | null;
/** Whether to average the attention weights across heads. (default: true) */
average_attn_weights?: boolean;
/** If true, applies causal masking. (default: false) */
is_causal?: boolean;
}training(boolean)optional- – Whether the module is in training mode. (default: true)
key_padding_mask(Tensor | null)optional- – Mask to exclude keys from attention. (default: null)
need_weights(boolean)optional- – Whether to return the attention weights. (default: true)
attn_mask(Tensor | null)optional- – Mask to prevent attention to certain positions. (default: null)
use_separate_proj_weight(boolean)optional- – Whether to use separate projection weights for Q, K, V. (default: false)
q_proj_weight(Tensor | null)optional- – Separate projection weight for Query. (default: null)
k_proj_weight(Tensor | null)optional- – Separate projection weight for Key. (default: null)
v_proj_weight(Tensor | null)optional- – Separate projection weight for Value. (default: null)
static_k(Tensor | null)optional- – Static Key for attention. (default: null)
static_v(Tensor | null)optional- – Static Value for attention. (default: null)
average_attn_weights(boolean)optional- – Whether to average the attention weights across heads. (default: true)
is_causal(boolean)optional- – If true, applies causal masking. (default: false)
Options for multi_head_attention_forward functional operation.