aac_datasets.datasets.base module

class AACDataset(
raw_data: Dict[str, List[Any]] | None = None,
transform: Callable[[ItemType], Any] | None = None,
column_names: Iterable[str] | None = None,
flat_captions: bool = False,
sr: int | None = None,
verbose: int = 0,
)[source]

Bases: Generic[ItemType], Dataset[ItemType]

Base class for AAC datasets.

add_online_column(
column: str,
load_fn: Callable[[Any, int], Any],
allow_replace: bool = False,
) None[source]

Add a new post-processed column to this dataset.

add_online_columns(
post_columns_fns: Dict[str, Callable[[Any, int], Any]],
allow_replace: bool = False,
) None[source]

Add several new post-processed columns to this dataset.

add_raw_column(
column: str,
column_data: List[Any],
allow_replace: bool = False,
) None[source]

Add a new raw column to this dataset.

property all_columns: List[str]

The name of all columns of the dataset.

at(
index: int,
) ItemType[source]
at(
index: Iterable[int] | Iterable[bool] | slice | None,
column: str,
) List
at(
index: Iterable[int] | Iterable[bool] | slice | None,
) Dict[str, List]
at(
index: Iterable[int] | Iterable[bool] | slice | None,
column: Iterable[str] | None,
) Dict[str, List]
at(
index: int | Iterable[int] | Iterable[bool] | Tensor | slice | None,
column: str | Iterable[str] | None,
) Any

Get a specific data field.

Parameters:
  • index – The index or slice of the value in range [0, len(dataset)-1].

  • column – The name(s) of the column. Can be any value of columns().

Returns:

The field value. The type depends of the column.

property column_names: List[str]

The name of all selected column of the dataset.

property flat_captions: bool

Returns true if captions has been flattened.

has_column(
column: str,
) bool[source]

Returns True if column name exists in data.

has_post_column(
column: str,
) bool[source]

Returns True if column name exists in post processed data.

has_raw_column(
column: str,
) bool[source]

Returns True if column name exists in raw data.

static new_empty() AACDataset[source]

Create a new empty dataset.

property num_columns: int

Number of columns in the dataset.

property num_rows: int

Number of rows in the dataset (same as len()).

preload_online_column(
column: str,
allow_replace: bool = False,
) Callable[[Any, int], Any][source]

Load all data from a post-column data into raw data.

property raw_data: Dict[str, List[Any]]
remove_column(
column: str,
) List[Any] | Callable[source]

Removes a column from this dataset.

rename_column(
old_column: str,
new_column: str,
allow_replace: bool = False,
) None[source]

Renames a column from this dataset.

property shape: Tuple[int, int]

Shape of the dataset (number of columns, number of rows).

property sr: int | None
property transform: Callable | None
property verbose: int