{ "cells": [ { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# Acoustic feature extraction\n", "\n", "Pykanto is designed to provide a platform that enables researchers to run and\n", "any analyses that they might need with ease and in a reproducible way. The\n", "precise nature of those analyses will vary greatly, so `pykanto`'s aim is not\n", "to provide functions or methods to, for example, extract audio features—there\n", "already are other, much better libraries for that.\n", "\n", "These are some examples that show one way in which you can\n", "extract and store features from vocalisations in a dataset created with pykanto:" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "tags": [ "hide-output", "hide-input" ] }, "outputs": [], "source": [ "from __future__ import annotations\n", "\n", "from pathlib import Path\n", "\n", "import pkg_resources\n", "\n", "from pykanto.dataset import KantoData\n", "from pykanto.parameters import Parameters\n", "from pykanto.signal.analysis import (\n", " approximate_minmax_frequency,\n", " spec_centroid_bandwidth,\n", ")\n", "from pykanto.signal.segment import segment_files_parallel, ReadWav\n", "from pykanto.utils.custom import (\n", " chipper_units_to_json,\n", " parse_sonic_visualiser_xml,\n", ")\n", "from pykanto.utils.io import get_unit_spectrograms, load_dataset\n", "from pykanto.utils.paths import ProjDirs, get_file_paths, get_wavs_w_annotation\n", "from pykanto.signal.spectrogram import retrieve_spectrogram\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "tags": [ "hide-input", "hide-output" ] }, "outputs": [], "source": [ "# ──── SETTINGS ────────────────────────────────────────────────────────────────\n", "\n", "DATASET_ID = \"STORM-PETREL\"\n", "DATA_PATH = Path(pkg_resources.resource_filename(\"pykanto\", \"data\"))\n", "PROJECT = Path(DATA_PATH).parent\n", "RAW_DATA = DATA_PATH / \"raw\" / DATASET_ID\n", "DIRS = ProjDirs(PROJECT, RAW_DATA, DATASET_ID, mkdir=True)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "tags": [ "hide-input", "hide-output" ] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Found 2 files. They will be processed in 2 chunks of length 1.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c3a72c8345f14239b74ab9930c6557e4", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Finding and saving audio segments and their metadata: 0%| | 0/2 [00:00 34\u001b[0m dataset \u001b[39m=\u001b[39m KantoData(\n\u001b[1;32m 35\u001b[0m DIRS,\n\u001b[1;32m 36\u001b[0m parameters\u001b[39m=\u001b[39;49mparams,\n\u001b[1;32m 37\u001b[0m overwrite_dataset\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m,\n\u001b[1;32m 38\u001b[0m overwrite_data\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m,\n\u001b[1;32m 39\u001b[0m )\n\u001b[1;32m 41\u001b[0m out_dir \u001b[39m=\u001b[39m DIRS\u001b[39m.\u001b[39mDATA \u001b[39m/\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mdatasets\u001b[39m\u001b[39m\"\u001b[39m \u001b[39m/\u001b[39m DATASET_ID \u001b[39m/\u001b[39m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00mDATASET_ID\u001b[39m}\u001b[39;00m\u001b[39m.db\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 42\u001b[0m dataset \u001b[39m=\u001b[39m load_dataset(out_dir, DIRS)\n", "File \u001b[0;32m~/projects/pykanto/pykanto/dataset.py:123\u001b[0m, in \u001b[0;36mKantoData.__init__\u001b[0;34m(self, DIRS, parameters, random_subset, overwrite_dataset, overwrite_data)\u001b[0m\n\u001b[1;32m 120\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_get_wav_json_filedirs(random_subset\u001b[39m=\u001b[39mrandom_subset)\n\u001b[1;32m 121\u001b[0m \u001b[39m# TODO: perform a field check for the json files and provide\u001b[39;00m\n\u001b[1;32m 122\u001b[0m \u001b[39m# feedback on any missing/wrong fields, or their value types.\u001b[39;00m\n\u001b[0;32m--> 123\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_load_metadata()\n\u001b[1;32m 124\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_get_unique_ids()\n\u001b[1;32m 125\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_create_df()\n", "File \u001b[0;32m~/projects/pykanto/pykanto/utils/compute.py:107\u001b[0m, in \u001b[0;36mtiming..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 104\u001b[0m \u001b[39m@wraps\u001b[39m(f)\n\u001b[1;32m 105\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mwrap\u001b[39m(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs):\n\u001b[1;32m 106\u001b[0m start \u001b[39m=\u001b[39m time()\n\u001b[0;32m--> 107\u001b[0m output \u001b[39m=\u001b[39m f(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 108\u001b[0m end \u001b[39m=\u001b[39m time()\n\u001b[1;32m 109\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mpykanto\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mdataset\u001b[39;00m \u001b[39mimport\u001b[39;00m KantoData\n", "File \u001b[0;32m~/projects/pykanto/pykanto/dataset.py:244\u001b[0m, in \u001b[0;36mKantoData._load_metadata\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 240\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 241\u001b[0m jsons \u001b[39m=\u001b[39m _get_json_parallel(\n\u001b[1;32m 242\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_jsonfiles, verbose\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mparameters\u001b[39m.\u001b[39mverbose\n\u001b[1;32m 243\u001b[0m )\n\u001b[0;32m--> 244\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mmetadata \u001b[39m=\u001b[39m {Path(json[\u001b[39m\"\u001b[39m\u001b[39mwav_file\u001b[39m\u001b[39m\"\u001b[39m])\u001b[39m.\u001b[39mstem: json \u001b[39mfor\u001b[39;00m json \u001b[39min\u001b[39;00m jsons}\n\u001b[1;32m 246\u001b[0m \u001b[39m# Match wav_file field with actual wav_file location for this dataset\u001b[39;00m\n\u001b[1;32m 247\u001b[0m \u001b[39m# Partially fixes nilomr/pykanto#12\u001b[39;00m\n\u001b[1;32m 248\u001b[0m \u001b[39mfor\u001b[39;00m wavfile \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_wavfiles:\n", "File \u001b[0;32m~/projects/pykanto/pykanto/dataset.py:244\u001b[0m, in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 240\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 241\u001b[0m jsons \u001b[39m=\u001b[39m _get_json_parallel(\n\u001b[1;32m 242\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_jsonfiles, verbose\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mparameters\u001b[39m.\u001b[39mverbose\n\u001b[1;32m 243\u001b[0m )\n\u001b[0;32m--> 244\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mmetadata \u001b[39m=\u001b[39m {Path(json[\u001b[39m\"\u001b[39;49m\u001b[39mwav_file\u001b[39;49m\u001b[39m\"\u001b[39;49m])\u001b[39m.\u001b[39mstem: json \u001b[39mfor\u001b[39;00m json \u001b[39min\u001b[39;00m jsons}\n\u001b[1;32m 246\u001b[0m \u001b[39m# Match wav_file field with actual wav_file location for this dataset\u001b[39;00m\n\u001b[1;32m 247\u001b[0m \u001b[39m# Partially fixes nilomr/pykanto#12\u001b[39;00m\n\u001b[1;32m 248\u001b[0m \u001b[39mfor\u001b[39;00m wavfile \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_wavfiles:\n", "\u001b[0;31mKeyError\u001b[0m: 'wav_file'" ] } ], "source": [ "# ──── MAIN ────────────────────────────────────────────────────────────────────\n", "# Creates and segments a dataset from raw data\n", "\n", "wav_filepaths, xml_filepaths = [\n", " get_file_paths(DIRS.RAW_DATA, [ext]) for ext in [\".wav\", \".xml\"]\n", "]\n", "files_to_segment = get_wavs_w_annotation(wav_filepaths, xml_filepaths)\n", "\n", "segment_files_parallel(\n", " files_to_segment,\n", " DIRS,\n", " resample=22050,\n", " parser_func=parse_sonic_visualiser_xml,\n", " min_duration=0.1,\n", " min_freqrange=100,\n", " labels_to_ignore=[\"NOISE\"],\n", " verbose=False\n", ")\n", "\n", "chipper_units_to_json(DIRS.SEGMENTED, overwrite_json = True)\n", "\n", "params = Parameters(\n", " window_length=512,\n", " hop_length=32,\n", " n_fft=2048,\n", " num_mel_bins=240,\n", " sr=22050,\n", " top_dB=65,\n", " highcut=10000,\n", " lowcut=0,\n", " dereverb=False,\n", " verbose=False,\n", ")\n", "dataset = KantoData(\n", " DIRS,\n", " parameters=params,\n", " overwrite_dataset=True,\n", " overwrite_data=False,\n", ")\n", "\n", "out_dir = DIRS.DATA / \"datasets\" / DATASET_ID / f\"{DATASET_ID}.db\"\n", "dataset = load_dataset(out_dir, DIRS)\n", "dataset.segment_into_units()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We now have a dataset that contains multiple vocalisations. Let's tell `pykanto`\n", "that we want to look at individual notes, by setting `song_level=False`, and\n", "then plot the spectrogram for a single vocalisation in the dataset." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "e86fbab32df74b819e6ee299fef0f950", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Calculating and saving unit spectrograms: 0%| | 0/1 [00:00" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Tell pykanto to run the analysis at the unit, not song, level\n", "dataset.parameters.update(song_level=False)\n", "dataset.get_units()\n", "\n", "# Choose a song to look at\n", "ID = \"STORM-PETREL\"\n", "key = str(dataset.data.index[9])\n", "\n", "# Plot spectrogram of vocalisation, with note on/offsets\n", "dataset.plot(key, title= 'Storm Petrel Purr Song', segmented=True)" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "Now that the data are ready, we can extract some features from the spectrograms.\n", "As an example, let's estimate the minimum and maximum frequencies and the\n", "spectral centroid of each note over time using pykanto, and plot the results.\n", "\n", "Here I have selected the final note of a male Storm Petrel's 'purring song':\n", "\n", ">\n", "> The purring sound is given in short bursts, separated by a strange little\n", "> breath note, once famously likened to “a fairy being sick” (C Oldham in\n", "> Lockley 1983). Purring is more often heard coming from the safety of a burrow,\n", "> where the combined phrase of purring plus breath note is repeated\n", "> monotonously, often for minutes on end.
\n", "> [Magnus Robb / The Sound\n", "> Approach](https://soundapproach.co.uk/species/british-storm-petrel/)
\n", "> \n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%capture\n", "\n", "# Extract spectrograms of units and get the last note of the song\n", "units = get_unit_spectrograms(dataset, ID)\n", "breath_note = units[key][-1]\n", "\n", "# This returns arrays of floats with a time series of min and max frequencies\n", "minfreqs, maxfreqs = approximate_minmax_frequency(\n", " dataset, spec=breath_note, plot=True, roll_percents=[0.9, 0.05]\n", ")\n", "\n", "# This returns a time series of spectral centroids and badwidths\n", "centroid, bw = spec_centroid_bandwidth(dataset, spec=breath_note, plot=True)" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "![output](../custom/freq_bw_fig.jpg)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "from pykanto.signal.segment import ReadWav\n", "from pykanto.signal.spectrogram import retrieve_spectrogram\n", "key = dataset.data.index[0]\n", "\n", "spec = retrieve_spectrogram(dataset.files.at[key, \"spectrogram\"])\n", "wav_object = ReadWav(Path(dataset.files.at[key, \"wav_file\"]))\n", "wavfile, audio_metadata = wav_object.get_wav(), wav_object.as_dict()" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "That's just an example using wrappers around a couple of\n", "`[librosa](https://librosa.org/doc/latest/index.html)` functions. In general, if\n", "you need to access a spectrogram for further analysis, you can do so easily by\n", "calling\n", "\n", "```{code-block} python\n", "spec = retrieve_spectrogram(dataset.files.at[key, \"spectrogram\"])\n", "```\n", "\n", "where `key` is the index of the vocalisation you want to access. If you need to\n", "access the audio file itself, you can do it like so:\n", "\n", "```{code-block} python\n", "wav_object = ReadWav(Path(dataset.files.at[key, \"wav_file\"]))\n", "wavfile, audio_metadata = wav_object.get_wav(), wav_object.as_dict()\n", "```\n", "\n", "or using your favourite library that can read wav files." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3.9.12 ('pykanto-dev')", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.15" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "cf30c6a63fc6852a8d910622565c3348d4a7fab8fc38710c97d8db63a595f32d" } } }, "nbformat": 4, "nbformat_minor": 2 }