spark.SparkDatasetSplit
export interface SparkDatasetSplit {
/**
* Number of samples in this split.
*
* @readonly
*/
readonly length: number;
/**
* Create a batched async iterator over the split.
*
* Yields batches of samples. The last batch may be smaller
* if the number of samples is not divisible by batch size.
*
* @param batchSize - Number of samples per batch
* @yields Batches with `x` and `y` arrays
*/
batch(batchSize: number): AsyncIterable<{ x: unknown; y: unknown }>;
/**
* Get a single sample by index.
*
* @param index - Sample index (0-based)
* @returns Sample with `x` (features) and `y` (label) arrays
* @throws If index is out of range
*/
get(index: number): Promise<{ x: unknown; y: unknown }>;
}- readonly
length(number) - – Number of samples in this split.
batch((batchSize: number) => AsyncIterable<{ x: unknown; y: unknown }>)- – Create a batched async iterator over the split. Yields batches of samples. The last batch may be smaller if the number of samples is not divisible by batch size.
get((index: number) => Promise<{ x: unknown; y: unknown }>)- – Get a single sample by index.
A single split of a dataset (train, test, val, etc.).
Provides batching and indexing access to the data.
Examples
const data = await spark.dataset('kasumi/mnist');
// Iterate with batching
for (const { x, y } of data.train.batch(64)) {
// x: Float32Array [64, 784] or Uint8Array [64, 784]
// y: Int32Array [64]
}
// Get single sample
const { x, y } = await data.train.get(0);