torch.js has not been released yet.

torch.nn.functional.MultiHeadAttentionFunctionalOptions

export interface MultiHeadAttentionFunctionalOptions {
  /** Whether the module is in training mode. (default: true) */
  training?: boolean;
  /** Mask to exclude keys from attention. (default: null) */
  key_padding_mask?: Tensor | null;
  /** Whether to return the attention weights. (default: true) */
  need_weights?: boolean;
  /** Mask to prevent attention to certain positions. (default: null) */
  attn_mask?: Tensor | null;
  /** Whether to use separate projection weights for Q, K, V. (default: false) */
  use_separate_proj_weight?: boolean;
  /** Separate projection weight for Query. (default: null) */
  q_proj_weight?: Tensor | null;
  /** Separate projection weight for Key. (default: null) */
  k_proj_weight?: Tensor | null;
  /** Separate projection weight for Value. (default: null) */
  v_proj_weight?: Tensor | null;
  /** Static Key for attention. (default: null) */
  static_k?: Tensor | null;
  /** Static Value for attention. (default: null) */
  static_v?: Tensor | null;
  /** Whether to average the attention weights across heads. (default: true) */
  average_attn_weights?: boolean;
  /** If true, applies causal masking. (default: false) */
  is_causal?: boolean;
}

training(boolean)optional: – Whether the module is in training mode. (default: true)
key_padding_mask(Tensor | null)optional: – Mask to exclude keys from attention. (default: null)
need_weights(boolean)optional: – Whether to return the attention weights. (default: true)
attn_mask(Tensor | null)optional: – Mask to prevent attention to certain positions. (default: null)
use_separate_proj_weight(boolean)optional: – Whether to use separate projection weights for Q, K, V. (default: false)
q_proj_weight(Tensor | null)optional: – Separate projection weight for Query. (default: null)
k_proj_weight(Tensor | null)optional: – Separate projection weight for Key. (default: null)
v_proj_weight(Tensor | null)optional: – Separate projection weight for Value. (default: null)
static_k(Tensor | null)optional: – Static Key for attention. (default: null)
static_v(Tensor | null)optional: – Static Value for attention. (default: null)
average_attn_weights(boolean)optional: – Whether to average the attention weights across heads. (default: true)
is_causal(boolean)optional: – If true, applies causal masking. (default: false)

Options for multi_head_attention_forward functional operation.

multi_margin_loss

multilabel_margin_loss