spark.SparkDataset
export interface SparkDataset {
/**
* Dataset name from manifest.
*
* @readonly
*/
readonly name: string;
/**
* Human-readable description.
*
* @readonly
*/
readonly description?: string;
/**
* Training split (always present).
*
* @readonly
*/
readonly train: SparkDatasetSplit;
/**
* Test split (if present in manifest).
*
* @readonly
*/
readonly test?: SparkDatasetSplit;
/**
* Validation split (if present in manifest).
*
* @readonly
*/
readonly val?: SparkDatasetSplit;
/**
* Size of each split.
*
* Maps split name to number of samples.
*
* @readonly
*/
readonly size: { [split: string]: number };
/**
* Raw torch.json manifest.
*
* Contains all metadata about the dataset.
*
* @readonly
*/
readonly manifest: SparkDatasetManifest;
}- readonly
name(string) - – Dataset name from manifest.
- readonly
description(string)optional - – Human-readable description.
- readonly
train(SparkDatasetSplit) - – Training split (always present).
- readonly
test(SparkDatasetSplit)optional - – Test split (if present in manifest).
- readonly
val(SparkDatasetSplit)optional - – Validation split (if present in manifest).
- readonly
size({ [split: string]: number }) - – Size of each split. Maps split name to number of samples.
- readonly
manifest(SparkDatasetManifest) - – Raw torch.json manifest. Contains all metadata about the dataset.
A complete dataset with train/test/val splits.
Load via spark.dataset('username/project').
Examples
const data = await spark.dataset('kasumi/mnist');
console.log(data.name); // "mnist"
console.log(data.size.train); // 60000
console.log(data.size.test); // 10000
for (const batch of data.train.batch(64)) {
// Train on batch
}