Source code for aac_datasets.utils.audioset_mapping

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import csv
import os
import os.path as osp
from pathlib import Path
from typing import Dict, Union

from torch.hub import download_url_to_file

_AUDIOSET_INFOS = {
    "class_labels_indices": {
        "fname": "class_labels_indices.csv",
        "url": "http://storage.googleapis.com/us_audioset/youtube_corpus/v1/csv/class_labels_indices.csv",
    },
}
_DEFAULT_CACHE_PATH = Path.home().joinpath(".cache", "audioset_mapping")


[docs]def get_audioset_mapping_cache_path(cache_path: Union[str, Path, None] = None) -> Path: if cache_path is not None: return Path(cache_path) else: return _DEFAULT_CACHE_PATH
[docs]def download_audioset_mapping( cache_path: Union[str, Path, None] = None, verbose: int = 0, ) -> None: cache_path = get_audioset_mapping_cache_path(cache_path) os.makedirs(cache_path, exist_ok=True) info = _AUDIOSET_INFOS["class_labels_indices"] map_fname = info["fname"] map_fpath = cache_path.joinpath(map_fname) url = info["url"] download_url_to_file(url, str(map_fpath), progress=verbose >= 1)
[docs]def load_audioset_mapping( key_name: str = "index", val_name: str = "display_name", offline: bool = False, cache_path: Union[str, Path, None] = None, verbose: int = 0, ) -> Dict: NAMES = ("index", "mid", "display_name") if key_name not in NAMES: raise ValueError(f"Invalid argument {key_name=}. (expected one of {NAMES})") if val_name not in NAMES: raise ValueError(f"Invalid argument {val_name=}. (expected one of {NAMES})") if key_name == val_name: raise ValueError( f"Invalid arguments key_name={key_name} with {val_name=}. (expected different values)" ) cache_path = get_audioset_mapping_cache_path(cache_path) info = _AUDIOSET_INFOS["class_labels_indices"] map_fname = info["fname"] map_fpath = cache_path.joinpath(map_fname) if not osp.isfile(map_fpath): if offline: msg = f"Cannot find or download audioset mapping file in '{map_fpath}' with mode {offline=}." raise FileNotFoundError(msg) download_audioset_mapping(cache_path, verbose) with open(map_fpath, "r") as file: reader = csv.DictReader(file, skipinitialspace=True, strict=True) data = list(reader) keys = [data_i[key_name] for data_i in data] values = [data_i[val_name] for data_i in data] if key_name == "index": keys = list(map(int, keys)) if val_name == "index": values = list(map(int, values)) mapping = dict(zip(keys, values)) return mapping
[docs]def load_audioset_name_to_idx( offline: bool = False, cache_path: Union[str, Path, None] = None, verbose: int = 0, ) -> Dict[str, int]: return load_audioset_mapping("display_name", "index", offline, cache_path, verbose)