aac_datasets.datasets.base module

class AACDataset(
raw_data: dict[str, list[Any]] | None = None,
transform: Callable[[ItemType], Any] | None = None,
column_names: Iterable[str] | None = None,
flat_captions: bool = False,
sr: int | Iterable[int] | None = None,
verbose: int = 0,
)[source]

Bases: Generic[ItemType], Dataset[ItemType]

Base class for AAC datasets.

add_online_column(
column: str,
load_fn: Callable[[Any, int], Any],
allow_replace: bool = False,
) None[source]

Add a new post-processed column to this dataset.

add_online_columns(
post_columns_fns: dict[str, Callable[[Any, int], Any]],
allow_replace: bool = False,
) None[source]

Add several new post-processed columns to this dataset.

add_raw_column(
column_name: str,
column_data: list[Any],
allow_replace: bool = False,
) None[source]

Add a new raw column to this dataset.

property all_columns : list[str]

The name of all columns of the dataset.

at(
*args,
**kwargs,
) Any[source]

Deprecated: Use get_item method instead.

property column_names : list[str]

The name of all selected column of the dataset.

property flat_captions : bool

Returns true if captions has been flattened.

get_item(
index: int,
) ItemType[source]
get_item(
index: Iterable[int] | Iterable[bool] | slice | None,
column: str,
) list
get_item(
index: Iterable[int] | Iterable[bool] | slice | None,
column: Iterable[str] | None = None,
) dict[str, list]
get_item(
index: int | Iterable[int] | Iterable[bool] | Tensor | slice | None,
column: str | Iterable[str] | None,
) Any

Get a specific data field.

Parameters:
index: int
index: Iterable[int] | Iterable[bool] | slice | None
index: int | Iterable[int] | Iterable[bool] | Tensor | slice | None

The index or slice of the value in range [0, len(dataset)-1].

column: str
column: Iterable[str] | None = None
column: str | Iterable[str] | None

The name(s) of the column. Can be any value of columns().

Returns:

The field value. The type depends of the column.

has_column(
column: str,
) bool[source]

Returns True if column name exists in data.

has_post_column(
column: str,
) bool[source]

Returns True if column name exists in post processed data.

has_raw_column(
column: str,
) bool[source]

Returns True if column name exists in raw data.

static new_empty() AACDataset[source]

Create a new empty dataset.

property num_columns : int

Number of columns in the dataset.

property num_rows : int

Number of rows in the dataset (same as len()).

preload_online_column(
column: str,
allow_replace: bool = False,
) Callable[[Any, int], Any][source]

Load all data from a post-column data into raw data.

property raw_data : dict[str, list[Any]]
remove_column(
column: str,
) list[Any] | Callable[source]

Removes a column from this dataset.

rename_column(
old_column: str,
new_column: str,
allow_replace: bool = False,
) None[source]

Renames a column from this dataset.

property shape : tuple[int, int]

Shape of the dataset (number of columns, number of rows).

property sr : int | list[int] | None
to_dict(
load_online_values: bool = False,
) dict[str, list[Any]][source]

Convert dataset to dictionary.

Parameters:
load_online_values: bool = False

If True, load ALL online values (e.g. audio waveform). Otherwise load only the raw data of the dataset. defaults to False.

to_hf_dataset(
load_online_values: bool = False,
) Dataset[source]
to_list(
load_online_values: bool = False,
) list[ItemType][source]

Convert dataset to list.

Parameters:
load_online_values: bool = False

If True, load ALL online values (e.g. audio waveform). Otherwise load only the raw data of the dataset. defaults to False.

property transform : Callable | None
property verbose : int