#!/usr/bin/env python
# -*- coding: utf-8 -*-
import csv
import json
import logging
import os
import os.path as osp
import subprocess
import zipfile
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union, get_args
import pythonwrench as pw
import tqdm
from huggingface_hub import snapshot_download
from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
from huggingface_hub.utils.tqdm import (
are_progress_bars_disabled,
disable_progress_bars,
enable_progress_bars,
)
from torchwrench.hub.download import download_file, safe_rmdir
from typing_extensions import Literal, TypedDict
from aac_datasets.datasets.functional.common import DatasetCard, LinkInfo
from aac_datasets.utils.globals import _get_root, _get_zip_path
logger = logging.getLogger(__name__)
WavCapsSource = Literal["AudioSet_SL", "BBC_Sound_Effects", "FreeSound", "SoundBible"]
WavCapsSubset = Literal[
"audioset",
"bbc",
"freesound",
"soundbible",
"audioset_no_audiocaps_v1",
"freesound_no_clotho_v2",
]
[docs]
class WavCapsCard(DatasetCard):
ANNOTATIONS_CREATORS: Tuple[str, ...] = ("machine-generated",)
CAPTIONS_PER_AUDIO: Dict[WavCapsSubset, int] = {
"audioset": 1,
"bbc": 1,
"freesound": 1,
"soundbible": 1,
"audioset_no_audiocaps_v1": 1,
"freesound_no_clotho_v2": 1,
}
CITATION: str = r"""
@article{mei2023WavCaps,
title = {Wav{C}aps: A {ChatGPT}-Assisted Weakly-Labelled Audio Captioning Dataset for Audio-Language Multimodal Research},
author = {Xinhao Mei and Chutong Meng and Haohe Liu and Qiuqiang Kong and Tom Ko and Chengqi Zhao and Mark D. Plumbley and Yuexian Zou and Wenwu Wang},
year = 2023,
journal = {arXiv preprint arXiv:2303.17395},
url = {https://arxiv.org/pdf/2303.17395.pdf}
}
"""
DEFAULT_REVISION: str = "85a0c21e26fa7696a5a74ce54fada99a9b43c6de"
DEFAULT_SUBSET: WavCapsSubset = "audioset_no_audiocaps_v1"
DESCRIPTION: str = "WavCaps: A ChatGPT-Assisted Weakly-Labelled Audio Captioning Dataset for Audio-Language Multimodal Research."
EXPECTED_SIZES: Dict[WavCapsSource, int] = {
"AudioSet_SL": 108317,
"BBC_Sound_Effects": 31201,
"FreeSound": 262300,
"SoundBible": 1320, # note: 1232 according to github+hf, but found 1320 => seems that archive contains more data than in json
}
HOMEPAGE = "https://huggingface.co/datasets/cvssp/WavCaps"
LANGUAGE: Tuple[str, ...] = ("en",)
LANGUAGE_DETAILS: Tuple[str, ...] = ("en-US",)
NAME: str = "wavcaps"
PRETTY_NAME: str = "WavCaps"
REPO_ID: str = "cvssp/WavCaps"
SOURCES: Tuple[WavCapsSource, ...] = get_args(WavCapsSource)
SUBSETS: Tuple[WavCapsSubset, ...] = get_args(WavCapsSubset)
SAMPLE_RATE: int = 32_000 # Hz
SIZE_CATEGORIES: Tuple[str, ...] = ("100K<n<1M",)
TASK_CATEGORIES: Tuple[str, ...] = ("audio-to-text", "text-to-audio")
[docs]
def load_wavcaps_dataset(
# Common args
root: Union[str, Path, None] = None,
subset: WavCapsSubset = WavCapsCard.DEFAULT_SUBSET,
verbose: int = 0,
*,
# WavCaps-specific args
hf_cache_dir: Optional[str] = None,
revision: Optional[str] = None,
) -> Dict[str, List[Any]]:
"""Load WavCaps metadata.
:param root: Dataset root directory.
defaults to ".".
:param subset: The subset of MACS to use. Can be one of :attr:`~MACSCard.SUBSETS`.
defaults to "audioset_no_audiocaps_v1".
:param verbose: Verbose level.
defaults to 0.
:param hf_cache_dir: Optional override for HuggingFace cache directory path.
defaults to None.
:param revision: Optional override for revision commit/name for HuggingFace rapository.
defaults to None.
:returns: A dictionnary of lists containing each metadata.
"""
if subset in _WAVCAPS_OLD_SUBSETS_NAMES:
new_subset = _WAVCAPS_OLD_SUBSETS_NAMES[subset]
if verbose >= 0:
msg = f"Deprecated subset name '{subset}', use '{new_subset}' instead."
logger.warning(msg)
subset = new_subset
root = _get_root(root)
if subset not in WavCapsCard.SUBSETS:
msg = f"Invalid argument {subset=}. (expected one of {WavCapsCard.SUBSETS})"
raise ValueError(msg)
if subset == "audioset":
overlapped_ds = "AudioCaps (v1 and v2)"
overlapped_subsets = ("val", "test")
recommanded = "audioset_no_audiocaps_v1"
msg = (
f"You selected WavCaps subset '{subset}', be careful to not use these data as training when evaluating on {overlapped_ds} {overlapped_subsets} subsets. "
f"You can use {recommanded} subset for to avoid this bias with {overlapped_ds}."
)
logger.warning(msg)
elif subset == "freesound":
overlapped_ds = "Clotho"
overlapped_subsets = (
"val",
"eval",
"dcase_aac_test",
"dcase_aac_analysis",
"dcase_t2a_audio",
"dcase_t2a_captions",
)
recommanded = "freesound_no_clotho_v2"
msg = (
f"You selected WavCaps subset '{subset}', be careful to not use these data as training when evaluating on {overlapped_ds} {overlapped_subsets} subsets. "
f"You can use {recommanded} subset for to avoid this bias for Clotho val, eval, dcase_t2a_audio and dcase_t2a_captions subsets. Data could still overlap with Clotho dcase_aac_test and dcase_aac_analysis subsets."
)
logger.warning(msg)
if subset in (
"audioset_no_audiocaps_v1",
"freesound_no_clotho_v2",
):
if subset == "audioset_no_audiocaps_v1":
target_subset = "audioset"
csv_fname = _WAVCAPS_LINKS["blacklist_audiocaps"]["fname"]
elif subset == "freesound_no_clotho_v2":
target_subset = "freesound"
csv_fname = _WAVCAPS_LINKS["blacklist_clotho_v2"]["fname"]
else:
msg = f"INTERNAL ERROR: Invalid argument {subset=}."
raise ValueError(msg)
raw_data = _load_wavcaps_dataset_impl(
root=root,
subset=target_subset,
verbose=verbose,
hf_cache_dir=hf_cache_dir,
revision=revision,
)
wavcaps_ids = raw_data["id"]
wavcaps_root = _get_wavcaps_root(root, hf_cache_dir, revision)
csv_fpath = Path(wavcaps_root).joinpath(csv_fname)
with open(csv_fpath, "r") as file:
reader = csv.DictReader(file)
data = list(reader)
other_ids = [data_i["id"] for data_i in data]
other_ids = dict.fromkeys(other_ids)
indexes = [i for i, wc_id in enumerate(wavcaps_ids) if wc_id not in other_ids]
if verbose >= 1:
msg = f"Getting {len(indexes)}/{len(wavcaps_ids)} items from '{target_subset}' for subset '{subset}'."
logger.info(msg)
raw_data = {
column: [column_data[index] for index in indexes]
for column, column_data in raw_data.items()
}
return raw_data
raw_data = _load_wavcaps_dataset_impl(
root=root,
subset=subset,
verbose=verbose,
hf_cache_dir=hf_cache_dir,
revision=revision,
)
return raw_data
[docs]
def download_wavcaps_dataset(
# Common args
root: Union[str, Path, None] = None,
subset: WavCapsSubset = WavCapsCard.DEFAULT_SUBSET,
force: bool = False,
verbose: int = 0,
verify_files: bool = False,
*,
# WavCaps-specific args
clean_archives: bool = False,
hf_cache_dir: Optional[str] = None,
repo_id: Optional[str] = None,
revision: Optional[str] = None,
zip_path: Union[str, Path, None] = None,
) -> None:
"""Prepare WavCaps data.
:param root: Dataset root directory.
defaults to ".".
:param subset: The subset of MACS to use. Can be one of :attr:`~WavCapsCard.SUBSETS`.
defaults to "audioset_no_audiocaps_v1".
:param force: If True, force to download again all files.
defaults to False.
:param verbose: Verbose level.
defaults to 0.
:param verify_files: If True, check all file already downloaded are valid.
defaults to False.
:param clean_archives: If True, remove the compressed archives from disk to save space.
defaults to True.
:param hf_cache_dir: Optional override for HuggingFace cache directory path.
defaults to None.
:param repo_id: Repository ID on HuggingFace.
defaults to "cvssp/WavCaps".
:param revision: Optional override for revision commit/name for HuggingFace rapository.
defaults to None.
:param zip_path: Path to zip executable path in shell.
defaults to "zip".
"""
if subset in _WAVCAPS_OLD_SUBSETS_NAMES:
new_subset = _WAVCAPS_OLD_SUBSETS_NAMES[subset]
if verbose >= 0:
msg = f"Deprecated subset name '{subset}', use '{new_subset}' instead."
logger.warning(msg)
subset = new_subset
root = _get_root(root)
zip_path = _get_zip_path(zip_path)
if subset == "audioset_no_audiocaps_v1":
_download_blacklist(root, hf_cache_dir, revision, "blacklist_audiocaps")
return download_wavcaps_dataset(
root=root,
subset="audioset",
revision=revision,
hf_cache_dir=hf_cache_dir,
force=force,
verify_files=verify_files,
clean_archives=clean_archives,
zip_path=zip_path,
verbose=verbose,
)
elif subset == "freesound_no_clotho_v2":
_download_blacklist(root, hf_cache_dir, revision, "blacklist_clotho_v2")
return download_wavcaps_dataset(
root=root,
subset="freesound",
revision=revision,
hf_cache_dir=hf_cache_dir,
force=force,
verify_files=verify_files,
clean_archives=clean_archives,
zip_path=zip_path,
verbose=verbose,
)
if subset not in WavCapsCard.SUBSETS:
msg = f"Invalid argument {subset=}. (expected one of {WavCapsCard.SUBSETS})"
raise ValueError(msg)
# note: verbose=-1 to disable warning triggered when dset is not prepared
if not force and _is_prepared_wavcaps(
root, hf_cache_dir, revision, subset, verbose=-1
):
return None
if hf_cache_dir is None:
hf_cache_dir = HUGGINGFACE_HUB_CACHE
if repo_id is None:
repo_id = WavCapsCard.REPO_ID
# Download files from huggingface
ign_sources = [
source for source in WavCapsCard.SOURCES if not _use_source(source, subset)
]
ign_patterns = [
pattern
for source in ign_sources
for pattern in (f"json_files/{source}/*.json", f"Zip_files/{source}/*")
]
if verbose >= 2:
logger.debug(f"ign_sources={ign_sources}")
logger.debug(f"ign_patterns={ign_patterns}")
pbar_enabled = are_progress_bars_disabled()
if pbar_enabled and verbose <= 0:
disable_progress_bars()
snapshot_dpath = snapshot_download(
repo_id=repo_id,
repo_type="dataset",
revision=revision,
resume_download=not force,
local_files_only=False,
cache_dir=hf_cache_dir,
allow_patterns=None,
ignore_patterns=ign_patterns,
)
if pbar_enabled and verbose <= 0:
enable_progress_bars()
snapshot_abs_dpath = osp.abspath(snapshot_dpath)
wavcaps_root = _get_wavcaps_root(root, hf_cache_dir, revision)
if verbose >= 2:
logger.debug(f"snapshot_dpath={snapshot_dpath}")
logger.debug(f"snapshot_absdpath={snapshot_abs_dpath}")
logger.debug(f"wavcaps_dpath={wavcaps_root}")
del snapshot_dpath
# Build symlink to hf cache
if osp.lexists(wavcaps_root):
if not osp.islink(wavcaps_root):
raise RuntimeError("WavCaps root exists but it is not a symlink.")
link_target_abspath = osp.abspath(osp.realpath(wavcaps_root))
if link_target_abspath != snapshot_abs_dpath:
logger.error(
"Target link is not pointing to current snapshot path. It will be automatically replaced."
)
os.remove(wavcaps_root)
os.symlink(snapshot_abs_dpath, wavcaps_root, True)
else:
os.symlink(snapshot_abs_dpath, wavcaps_root, True)
source_and_splitted_lst: List[Tuple[WavCapsSource, bool]] = [
("AudioSet_SL", True),
("BBC_Sound_Effects", True),
("FreeSound", True),
("SoundBible", False),
]
source_and_splitted: Dict[WavCapsSource, bool] = {
source: is_splitted
for source, is_splitted in source_and_splitted_lst
if _use_source(source, subset)
}
archives_dpath = _get_archives_dpath(root, hf_cache_dir, revision)
for source, is_splitted in source_and_splitted.items():
main_zip_fpath = osp.join(
archives_dpath, _WAVCAPS_ARCHIVE_DNAMES[source], f"{source}.zip"
)
if is_splitted:
merged_zip_fpath = osp.join(
archives_dpath, _WAVCAPS_ARCHIVE_DNAMES[source], f"{source}_merged.zip"
)
else:
merged_zip_fpath = main_zip_fpath
if is_splitted and not osp.isfile(merged_zip_fpath):
cmd = [
zip_path,
"-FF",
main_zip_fpath,
"--out",
merged_zip_fpath,
]
if verbose >= 2:
logger.debug(f"Merging ZIP files for {source}...")
logger.debug(f"Using command: {' '.join(cmd)}")
if verbose >= 2:
stdout = None
stderr = None
else:
stdout = subprocess.DEVNULL
stderr = subprocess.DEVNULL
subprocess.check_call(cmd, stdout=stdout, stderr=stderr)
audio_subset_dpath = _get_audio_subset_dpath(
root, hf_cache_dir, revision, source
)
os.makedirs(audio_subset_dpath, exist_ok=True)
with zipfile.ZipFile(merged_zip_fpath, "r") as file:
flac_subnames = [name for name in file.namelist() if name.endswith(".flac")]
assert len(flac_subnames) > 0
assert all(
osp.dirname(name) == osp.dirname(flac_subnames[0])
for name in flac_subnames
)
src_root = osp.join(audio_subset_dpath, osp.dirname(flac_subnames[0]))
src_fnames_found = (
dict.fromkeys(name for name in os.listdir(src_root))
if osp.isdir(src_root)
else {}
)
tgt_fnames_found = dict.fromkeys(
name for name in os.listdir(audio_subset_dpath)
)
missing_subnames = [
subname
for subname in flac_subnames
if osp.basename(subname) not in src_fnames_found
and osp.basename(subname) not in tgt_fnames_found
]
if verbose >= 2:
logger.debug(
f"Extracting {len(missing_subnames)}/{len(flac_subnames)} audio files from {merged_zip_fpath}..."
)
file.extractall(audio_subset_dpath, missing_subnames)
if verbose >= 2:
logger.debug("Extraction done.")
src_fnames_found = (
dict.fromkeys(name for name in os.listdir(src_root))
if osp.isdir(src_root)
else {}
)
src_fpaths_to_move = [
osp.join(audio_subset_dpath, subname)
for subname in flac_subnames
if osp.basename(subname) in src_fnames_found
]
if verbose >= 2:
logger.debug(f"Moving {len(src_fpaths_to_move)} files...")
for src_fpath in tqdm.tqdm(src_fpaths_to_move):
tgt_fpath = osp.join(audio_subset_dpath, osp.basename(src_fpath))
os.rename(src_fpath, tgt_fpath)
if verbose >= 2:
logger.debug("Move done.")
if verify_files:
tgt_fnames_expected = [osp.basename(subname) for subname in flac_subnames]
tgt_fnames_found = dict.fromkeys(
fname for fname in os.listdir(audio_subset_dpath)
)
if verbose >= 2:
logger.debug(f"Checking {len(tgt_fnames_expected)} files...")
tgt_fnames_invalids = [
fname for fname in tgt_fnames_expected if fname not in tgt_fnames_found
]
if len(tgt_fnames_invalids) > 0:
raise FileNotFoundError(
f"Found {len(tgt_fnames_invalids)}/{len(tgt_fnames_expected)} invalid files."
)
safe_rmdir(audio_subset_dpath, rm_root=False, error_on_non_empty_dir=True)
if clean_archives:
used_sources = source_and_splitted.keys()
for source in used_sources:
archive_source_dpath = osp.join(
archives_dpath, _WAVCAPS_ARCHIVE_DNAMES[source]
)
archives_names = os.listdir(archive_source_dpath)
for name in archives_names:
if not name.endswith(".zip") and ".z" not in name:
continue
fpath = osp.join(archive_source_dpath, name)
if verbose >= 1:
logger.info(f"Removing archive file {name} for {source=}...")
os.remove(fpath)
[docs]
def download_wavcaps_datasets(
# Common args
root: Union[str, Path, None] = None,
subsets: Union[WavCapsSubset, Iterable[WavCapsSubset]] = WavCapsCard.DEFAULT_SUBSET,
force: bool = False,
verbose: int = 0,
*,
# WavCaps-specific args
clean_archives: bool = False,
hf_cache_dir: Optional[str] = None,
repo_id: Optional[str] = None,
revision: Optional[str] = None,
verify_files: bool = False,
zip_path: Union[str, Path, None] = None,
) -> None:
"""Function helper to download a list of subsets. See :func:`~aac_datasets.datasets.functional.wavcaps.download_wavcaps_dataset` for details."""
if isinstance(subsets, str):
subsets = [subsets]
else:
subsets = list(subsets)
kwargs: Dict[str, Any] = dict(
root=root,
force=force,
verbose=verbose,
clean_archives=clean_archives,
hf_cache_dir=hf_cache_dir,
repo_id=repo_id,
revision=revision,
verify_files=verify_files,
zip_path=zip_path,
)
for subset in subsets:
download_wavcaps_dataset(
subset=subset,
**kwargs,
)
def _load_wavcaps_dataset_impl(
# Common args
root: str,
subset: WavCapsSubset,
verbose: int,
# WavCaps-specific args
hf_cache_dir: Optional[str],
revision: Optional[str],
) -> Dict[str, List[Any]]:
if not _is_prepared_wavcaps(root, hf_cache_dir, revision, subset, verbose):
msg = f"{WavCapsCard.PRETTY_NAME} is not prepared in {root=}. Please use download=True to install it in root."
raise RuntimeError(msg)
json_dpath = _get_json_dpath(root, hf_cache_dir, revision)
json_paths: List[Tuple[WavCapsSource, str]] = [
("AudioSet_SL", osp.join(json_dpath, "AudioSet_SL", "as_final.json")),
(
"BBC_Sound_Effects",
osp.join(json_dpath, "BBC_Sound_Effects", "bbc_final.json"),
),
("FreeSound", osp.join(json_dpath, "FreeSound", "fsd_final.json")),
("SoundBible", osp.join(json_dpath, "SoundBible", "sb_final.json")),
]
json_paths = [
(source, json_path)
for source, json_path in json_paths
if _use_source(source, subset)
]
raw_data = {k: [] for k in _WAVCAPS_RAW_COLUMNS + ("source", "fname")}
for source, json_path in json_paths:
if verbose >= 2:
logger.debug(f"Loading metadata in JSON '{json_path}'...")
json_data, size = _load_json(json_path)
sources = [source] * size
json_data.pop("audio", None)
if source == "AudioSet_SL":
ids = json_data["id"]
fnames = [id_.replace(".wav", ".flac") for id_ in ids]
raw_data["fname"] += fnames
elif source == "BBC_Sound_Effects":
ids = json_data["id"]
fnames = [f"{id_}.flac" for id_ in ids]
raw_data["fname"] += fnames
elif source == "FreeSound":
ids = json_data["id"]
fnames = [f"{id_}.flac" for id_ in ids]
raw_data["fname"] += fnames
elif source == "SoundBible":
ids = json_data["id"]
fnames = [f"{id_}.flac" for id_ in ids]
raw_data["fname"] += fnames
else:
msg = f"Invalid source={source} in {json_path=}. (expected one of {WavCapsCard.SOURCES})"
raise RuntimeError(msg)
for k in _WAVCAPS_RAW_COLUMNS:
if k in json_data:
raw_data[k] += json_data[k]
elif k in _DEFAULT_VALUES:
default_val = _DEFAULT_VALUES[k]
default_values = [default_val] * size
raw_data[k] += default_values
elif k in ("audio", "file_name"):
pass
else:
raise RuntimeError(f"Invalid column name {k}. (with {source=})")
raw_data["source"] += sources
raw_data.pop("audio")
raw_data.pop("file_name")
captions = raw_data.pop("caption")
# Convert str -> List[str] for captions to match other datasets captions type
raw_data["captions"] = [[caption] for caption in captions]
# Force floating-point precision for duration
raw_data["duration"] = list(map(float, raw_data["duration"]))
return raw_data
def _get_wavcaps_root(
root: str,
hf_cache_dir: Optional[str],
revision: Optional[str],
) -> str:
return osp.join(root, "WavCaps")
def _get_json_dpath(
root: str,
hf_cache_dir: Optional[str],
revision: Optional[str],
) -> str:
return osp.join(_get_wavcaps_root(root, hf_cache_dir, revision), "json_files")
def _get_archives_dpath(
root: str,
hf_cache_dir: Optional[str],
revision: Optional[str],
) -> str:
return osp.join(_get_wavcaps_root(root, hf_cache_dir, revision), "Zip_files")
def _get_audio_dpath(
root: str,
hf_cache_dir: Optional[str],
revision: Optional[str],
) -> str:
return osp.join(_get_wavcaps_root(root, hf_cache_dir, revision), "Audio")
def _get_audio_subset_dpath(
root: str,
hf_cache_dir: Optional[str],
revision: Optional[str],
source: WavCapsSource,
) -> str:
return osp.join(
_get_audio_dpath(root, hf_cache_dir, revision),
_WAVCAPS_AUDIO_DNAMES[source],
)
def _is_prepared_wavcaps(
root: str,
hf_cache_dir: Optional[str],
revision: Optional[str],
subset: WavCapsSubset,
verbose: int,
) -> bool:
sources: List[WavCapsSource] = [
source for source in WavCapsCard.SOURCES if _use_source(source, subset)
]
for source in sources:
audio_subset_dpath = _get_audio_subset_dpath(
root, hf_cache_dir, revision, source
)
if not osp.isdir(audio_subset_dpath):
if verbose >= 0:
msg = f"Cannot find directory {audio_subset_dpath=}."
logger.error(msg)
return False
audio_fnames = os.listdir(audio_subset_dpath)
expected_size = WavCapsCard.EXPECTED_SIZES[source]
if expected_size != len(audio_fnames):
if verbose >= 0:
msg = f"Invalid number of files for {source=}. (expected {expected_size} but found {len(audio_fnames)} files)"
logger.error(msg)
return False
return True
def _use_source(source: WavCapsSource, subset: WavCapsSubset) -> bool:
return any(
(
source == "AudioSet_SL"
and subset in ("audioset", "audioset_no_audiocaps_v1"),
source == "BBC_Sound_Effects" and subset in ("bbc",),
source == "FreeSound" and subset in ("freesound", "freesound_no_clotho_v2"),
source == "SoundBible" and subset in ("soundbible",),
)
)
def _load_json(fpath: str) -> Tuple[Dict[str, Any], int]:
with open(fpath, "r") as file:
data = json.load(file)
data = data["data"]
size = len(data)
data = pw.list_dict_to_dict_list(data, key_mode="same")
return data, size
def _download_blacklist(
root: str,
hf_cache_dir: Optional[str],
revision: Optional[str],
name: str,
verbose: int = 0,
) -> None:
info = _WAVCAPS_LINKS[name]
fname = info["fname"]
url = info["url"]
wavcaps_root = _get_wavcaps_root(root, hf_cache_dir, revision)
fpath = Path(wavcaps_root).joinpath(fname)
download_file(url, fpath, verbose=verbose)
class _WavCapsRawItem(TypedDict):
# Common values
caption: str
duration: float
id: str
# Source Specific values
audio: Optional[str]
author: Optional[str]
description: Optional[str]
download_link: Optional[str]
file_name: Optional[str]
href: Optional[str]
tags: Optional[List[str]]
_DEFAULT_VALUES = {
"author": "",
"description": "",
"download_link": "",
"href": "",
"tags": [],
}
_WAVCAPS_RAW_COLUMNS = tuple(
_WavCapsRawItem.__required_keys__ | _WavCapsRawItem.__optional_keys__ # type: ignore
)
_WAVCAPS_AUDIO_DNAMES: Dict[WavCapsSource, str] = {
# Source name to audio directory name
"AudioSet_SL": "AudioSet_SL",
"BBC_Sound_Effects": "BBC_Sound_Effects",
"FreeSound": "FreeSound",
"SoundBible": "SoundBible",
}
_WAVCAPS_ARCHIVE_DNAMES: Dict[WavCapsSource, str] = {
# Source name to audio directory name
"AudioSet_SL": "AudioSet_SL",
"BBC_Sound_Effects": "BBC_Sound_Effects",
"FreeSound": "FreeSound",
"SoundBible": "SoundBible",
}
_WAVCAPS_LINKS: Dict[str, LinkInfo] = {
"blacklist_audiocaps": {
"url": "https://raw.githubusercontent.com/Labbeti/aac-datasets/main/data/wavcaps/blacklist_audiocaps.full.csv",
"fname": "blacklist_audiocaps.full.csv",
},
"blacklist_clotho": {
"url": "https://raw.githubusercontent.com/Labbeti/aac-datasets/main/data/wavcaps/blacklist_clotho.full.csv",
"fname": "blacklist_clotho.full.csv",
},
"blacklist_clotho_v2": {
"url": "https://raw.githubusercontent.com/Labbeti/aac-datasets/main/data/wavcaps/blacklist_clotho.full.v2.csv",
"fname": "blacklist_clotho.full.v2.csv",
},
}
_WAVCAPS_OLD_SUBSETS_NAMES: Dict[str, WavCapsSubset] = {
"fsd": "freesound",
"as": "audioset",
"fsd_nocl": "freesound_no_clotho_v2",
"as_noac": "audioset_no_audiocaps_v1",
"sb": "soundbible",
"audioset_no_audiocaps": "audioset_no_audiocaps_v1",
}