.. DO NOT EDIT.
.. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY.
.. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE:
.. "beginner/audio_feature_augmentation_tutorial.py"
.. LINE NUMBERS ARE GIVEN BELOW.

.. only:: html

    .. note::
        :class: sphx-glr-download-link-note

        Click :ref:`here <sphx_glr_download_beginner_audio_feature_augmentation_tutorial.py>`
        to download the full example code

.. rst-class:: sphx-glr-example-title

.. _sphx_glr_beginner_audio_feature_augmentation_tutorial.py:


Audio Feature Augmentation
==========================

.. GENERATED FROM PYTHON SOURCE LINES 6-18

.. code-block:: default


    # When running this tutorial in Google Colab, install the required packages
    # with the following.
    # !pip install torchaudio librosa

    import torch
    import torchaudio
    import torchaudio.transforms as T

    print(torch.__version__)
    print(torchaudio.__version__)


.. GENERATED FROM PYTHON SOURCE LINES 19-22

Preparing data and utility functions (skip this section)
--------------------------------------------------------


.. GENERATED FROM PYTHON SOURCE LINES 22-108

.. code-block:: default


    # @title Prepare data and utility functions. {display-mode: "form"}
    # @markdown
    # @markdown You do not need to look into this cell.
    # @markdown Just execute once and you are good to go.
    # @markdown
    # @markdown In this tutorial, we will use a speech data from [VOiCES dataset](https://iqtlabs.github.io/voices/),
    # @markdown which is licensed under Creative Commos BY 4.0.

    # -------------------------------------------------------------------------------
    # Preparation of data and helper functions.
    # -------------------------------------------------------------------------------

    import os

    import librosa
    import matplotlib.pyplot as plt
    import requests


    _SAMPLE_DIR = "_assets"

    SAMPLE_WAV_SPEECH_URL = "https://pytorch-tutorial-assets.s3.amazonaws.com/VOiCES_devkit/source-16k/train/sp0307/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav"  # noqa: E501
    SAMPLE_WAV_SPEECH_PATH = os.path.join(_SAMPLE_DIR, "speech.wav")

    os.makedirs(_SAMPLE_DIR, exist_ok=True)


    def _fetch_data():
        uri = [
            (SAMPLE_WAV_SPEECH_URL, SAMPLE_WAV_SPEECH_PATH),
        ]
        for url, path in uri:
            with open(path, "wb") as file_:
                file_.write(requests.get(url).content)


    _fetch_data()


    def _get_sample(path, resample=None):
        effects = [["remix", "1"]]
        if resample:
            effects.extend(
                [
                    ["lowpass", f"{resample // 2}"],
                    ["rate", f"{resample}"],
                ]
            )
        return torchaudio.sox_effects.apply_effects_file(path, effects=effects)


    def get_speech_sample(*, resample=None):
        return _get_sample(SAMPLE_WAV_SPEECH_PATH, resample=resample)


    def get_spectrogram(
        n_fft=400,
        win_len=None,
        hop_len=None,
        power=2.0,
    ):
        waveform, _ = get_speech_sample()
        spectrogram = T.Spectrogram(
            n_fft=n_fft,
            win_length=win_len,
            hop_length=hop_len,
            center=True,
            pad_mode="reflect",
            power=power,
        )
        return spectrogram(waveform)


    def plot_spectrogram(spec, title=None, ylabel="freq_bin", aspect="auto", xmax=None):
        fig, axs = plt.subplots(1, 1)
        axs.set_title(title or "Spectrogram (db)")
        axs.set_ylabel(ylabel)
        axs.set_xlabel("frame")
        im = axs.imshow(librosa.power_to_db(spec), origin="lower", aspect=aspect)
        if xmax:
            axs.set_xlim((0, xmax))
        fig.colorbar(im, ax=axs)
        plt.show(block=False)



.. GENERATED FROM PYTHON SOURCE LINES 109-119

SpecAugment
-----------

`SpecAugment <https://ai.googleblog.com/2019/04/specaugment-new-data-augmentation.html>`__
is a popular spectrogram augmentation technique.

``torchaudio`` implements :py:func:`torchaudio.transforms.TimeStretch`,
:py:func:`torchaudio.transforms.TimeMasking` and
:py:func:`torchaudio.transforms.FrequencyMasking`.


.. GENERATED FROM PYTHON SOURCE LINES 121-124

TimeStretch
-----------


.. GENERATED FROM PYTHON SOURCE LINES 124-139

.. code-block:: default



    spec = get_spectrogram(power=None)
    stretch = T.TimeStretch()

    rate = 1.2
    spec_ = stretch(spec, rate)
    plot_spectrogram(torch.abs(spec_[0]), title=f"Stretched x{rate}", aspect="equal", xmax=304)

    plot_spectrogram(torch.abs(spec[0]), title="Original", aspect="equal", xmax=304)

    rate = 0.9
    spec_ = stretch(spec, rate)
    plot_spectrogram(torch.abs(spec_[0]), title=f"Stretched x{rate}", aspect="equal", xmax=304)


.. GENERATED FROM PYTHON SOURCE LINES 140-143

TimeMasking
-----------


.. GENERATED FROM PYTHON SOURCE LINES 143-154

.. code-block:: default


    torch.random.manual_seed(4)

    spec = get_spectrogram()
    plot_spectrogram(spec[0], title="Original")

    masking = T.TimeMasking(time_mask_param=80)
    spec = masking(spec)

    plot_spectrogram(spec[0], title="Masked along time axis")


.. GENERATED FROM PYTHON SOURCE LINES 155-158

FrequencyMasking
----------------


.. GENERATED FROM PYTHON SOURCE LINES 158-169

.. code-block:: default



    torch.random.manual_seed(4)

    spec = get_spectrogram()
    plot_spectrogram(spec[0], title="Original")

    masking = T.FrequencyMasking(freq_mask_param=80)
    spec = masking(spec)

    plot_spectrogram(spec[0], title="Masked along frequency axis")


.. rst-class:: sphx-glr-timing

   **Total running time of the script:** ( 0 minutes  0.000 seconds)


.. _sphx_glr_download_beginner_audio_feature_augmentation_tutorial.py:

.. only:: html

  .. container:: sphx-glr-footer sphx-glr-footer-example


    .. container:: sphx-glr-download sphx-glr-download-python

      :download:`Download Python source code: audio_feature_augmentation_tutorial.py <audio_feature_augmentation_tutorial.py>`

    .. container:: sphx-glr-download sphx-glr-download-jupyter

      :download:`Download Jupyter notebook: audio_feature_augmentation_tutorial.ipynb <audio_feature_augmentation_tutorial.ipynb>`


.. only:: html

 .. rst-class:: sphx-glr-signature

    `Gallery generated by Sphinx-Gallery <https://sphinx-gallery.github.io>`_