Skip to main content
torch.js has not been released yet.
torch.js logotorch.js logotorch.js
PlaygroundContact
Login
Documentation
IntroductionType SafetyTensor ExpressionsTensor IndexingEinsumEinopsAutogradTraining a ModelProfiling & MemoryPyTorch MigrationBest PracticesRuntimesPerformancePyTorch CompatibilityBenchmarksDType Coverage
torch.js· 2026
LegalTerms of UsePrivacy Policy
/
/
  1. docs
  2. Spark
  3. spark
  4. SparkDatasetManifest

spark.SparkDatasetManifest

export interface SparkDatasetManifest {
  /** Project name */
  name: string;
  /** Project version (semantic versioning) */
  version?: string;
  /** Human-readable description */
  description?: string;
  /** Project author (usually torchjs.org username) */
  author: string;
  /** Project type: "dataset", "model", or "project" */
  type?: 'dataset' | 'model' | 'project';
  /**
   * Dataset configuration.
   *
   * Required for dataset and project types.
   */
  dataset?: {
    /** Data format: "text", "image", etc. */
    format?: string;
    /**
     * Dataset splits (train, test, val, etc.)
     *
     * For image classification: specify `images` and `labels` file paths
     * For text: specify single `file` path
     */
    splits: {
      [split: string]: {
        /** Single file path (for text datasets) */
        file?: string;
        /** Images file path (for image classification) */
        images?: string;
        /** Labels file path (for image classification) */
        labels?: string;
        /** Number of samples in this split */
        samples?: number;
        /** Number of tokens (for language models) */
        tokens?: number;
      };
    };
    /** Image dimensions [height, width] (for image datasets) */
    image_size?: [number, number];
    /** Number of classes (for classification) */
    num_classes?: number;
    /** Data type: "float32", "uint8", etc. */
    dtype?: string;
    /** Path to vocabulary file (for language models) */
    vocabulary?: string;
    /** Tokenizer type: "char", "bpe", etc. */
    tokenizer?: string;
  };
  /**
   * Model configuration.
   *
   * Required for model and project types.
   */
  model?: {
    /** Model architecture name */
    architecture?: string;
    /** Path to model weights file */
    file: string;
    /** Path to model configuration file */
    config?: string;
    /** Number of parameters */
    parameters?: number;
    /** Data type of weights */
    dtype?: string;
  };
  /**
   * Training configuration and results.
   *
   * Metadata about how the model was trained.
   */
  training?: {
    /** Number of training epochs */
    epochs?: number;
    /** Batch size used during training */
    batch_size?: number;
    /** Optimizer algorithm */
    optimizer?: string;
    /** Learning rate */
    learning_rate?: number;
    /** Final accuracy achieved */
    final_accuracy?: number;
  };
  /**
   * File manifest with checksums.
   *
   * Maps filenames to their size and SHA256 hash.
   */
  files?: {
    [filename: string]: {
      /** File size in bytes */
      size: number;
      /** SHA256 hash of file contents */
      sha256?: string;
    };
  };
}
name(string)
– Project name
version(string)optional
– Project version (semantic versioning)
description(string)optional
– Human-readable description
author(string)
– Project author (usually torchjs.org username)
type('dataset' | 'model' | 'project')optional
– Project type: "dataset", "model", or "project"
dataset({ /** Data format: "text", "image", etc. */ format?: string; /** * Dataset splits (train, test, val, etc.) * * For image classification: specify `images` and `labels` file paths * For text: specify single `file` path */ splits: { [split: string]: { /** Single file path (for text datasets) */ file?: string; /** Images file path (for image classification) */ images?: string; /** Labels file path (for image classification) */ labels?: string; /** Number of samples in this split */ samples?: number; /** Number of tokens (for language models) */ tokens?: number; }; }; /** Image dimensions [height, width] (for image datasets) */ image_size?: [number, number]; /** Number of classes (for classification) */ num_classes?: number; /** Data type: "float32", "uint8", etc. */ dtype?: string; /** Path to vocabulary file (for language models) */ vocabulary?: string; /** Tokenizer type: "char", "bpe", etc. */ tokenizer?: string; })optional
– Dataset configuration. Required for dataset and project types.
model({ /** Model architecture name */ architecture?: string; /** Path to model weights file */ file: string; /** Path to model configuration file */ config?: string; /** Number of parameters */ parameters?: number; /** Data type of weights */ dtype?: string; })optional
– Model configuration. Required for model and project types.
training({ /** Number of training epochs */ epochs?: number; /** Batch size used during training */ batch_size?: number; /** Optimizer algorithm */ optimizer?: string; /** Learning rate */ learning_rate?: number; /** Final accuracy achieved */ final_accuracy?: number; })optional
– Training configuration and results. Metadata about how the model was trained.
files({ [filename: string]: { /** File size in bytes */ size: number; /** SHA256 hash of file contents */ sha256?: string; }; })optional
– File manifest with checksums. Maps filenames to their size and SHA256 hash.

torch.json manifest file for a dataset, model, or project.

This file describes the structure and metadata of a project on torchjs.org. It can describe a dataset-only project, a model-only project, or a full training project.

Examples

{
  "name": "mnist",
  "version": "1.0.0",
  "description": "Handwritten digits dataset",
  "author": "kasumi",
  "type": "dataset",
  "dataset": {
    "splits": {
      "train": {
        "images": "data/train-images.bin",
        "labels": "data/train-labels.bin",
        "samples": 60000
      },
      "test": {
        "images": "data/test-images.bin",
        "labels": "data/test-labels.bin",
        "samples": 10000
      }
    },
    "image_size": [28, 28],
    "num_classes": 10,
    "dtype": "uint8"
  }
}
Previous
SparkDataset
Next
SparkDatasetSplit