From ae510289226ce322e6cf603122cbc180f96cd409 Mon Sep 17 00:00:00 2001 From: Mathematical Michael Date: Mon, 8 Jan 2024 22:38:45 -0700 Subject: [PATCH] add main.py --- main.py | 182 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 182 insertions(+) create mode 100644 main.py diff --git a/main.py b/main.py new file mode 100644 index 0000000..05c1d60 --- /dev/null +++ b/main.py @@ -0,0 +1,182 @@ +import librosa +import numpy as np +import matplotlib.pyplot as plt +import scipy.io.wavfile as wavfile +import plotly.express as px + + +def load_audio(filename: str, mono: bool = False) -> (np.ndarray, int): + audio, sr = librosa.load(filename, sr=None, mono=mono) + # sr=None ensures original sampling rate is used + return audio, sr + + +def get_fourier_transform(audio: np.ndarray, sr: int) -> np.ndarray: + # Compute the Fourier Transform + fft_data = np.fft.fft(audio) + + # Compute the magnitude (amplitude) of the complex numbers + fft_magnitude = np.abs(fft_data) + + # Create frequency bins + freq = np.fft.fftfreq(len(fft_data), d=1 / sr) + + return freq, fft_magnitude + + +def plot_audio(audio: np.ndarray, sr: int, secs: int = 10) -> None: + if audio.ndim == 1: + audio = np.array([audio, audio]) + sample = audio[:, : (sr * secs)] + fig, ax = plt.subplots() + ax.plot(sample[0], label="L", c="xkcd:azure", alpha=0.5) + ax.plot(sample[1], label="R", c="xkcd:orange", alpha=0.5) + plt.savefig(f"out/{secs:03d}.png") + + +def plot_fourier(audio, secs, sr): + print("fourier") + freq, fft_magnitude = get_fourier_transform(audio[0, : (secs * sr)], sr) + plt.figure(figsize=(10, 6)) + plt.bar(freq, fft_magnitude, width=1.0) # Using bar plot to mimic histogram + plt.xlabel("Frequency (Hz)") + plt.ylabel("Magnitude") + plt.xscale("log") # Logarithmic scale for frequency axis + plt.title("Frequency Spectrum") + plt.savefig(f"out/fourier-{secs:03d}.png") + + +def get_spectra(audio, secs, sr): + print("spectra") + # NFFT: Number of data points used in each block for the FFT. A larger value provides better frequency resolution but worse time resolution. 2048 is a common choice. + # noverlap: The number of points of overlap between blocks. 1024 is half of NFFT, which is a common choice. + # Plotting the Spectrogram using plt.specgram + plt.figure(figsize=(10, 6)) + Pxx, freqs, bins, im = plt.specgram( + audio[1, : (secs * sr)], + Fs=sr, + NFFT=2048, + noverlap=1024, + cmap="viridis", + scale="dB", + ) + plt.xlabel("Time (s)") + plt.ylabel("Frequency (Hz)") + plt.colorbar(im).set_label("Intensity (dB)") + plt.title("Spectrogram") + plt.savefig(f"out/spectra-{secs:03d}.png") + + return Pxx, freqs, bins, im + + +def reconstruct_signal( + freq, mag, sr: int = 44100, secs: int = 1, normalize: bool = True +): + # Sample rate and duration of the original audio + duration = secs # seconds + + # Create an array for time + t = np.linspace(0, duration, duration * sr, endpoint=False) + + # Initialize the reconstructed signal as zeros + reconstructed_signal = np.zeros(len(t)) + + # Add each dominant frequency as a sine wave + for freq, magnitude in zip(freq, mag): + # Generate a sine wave for the dominant frequency + sine_wave = magnitude * np.sin(2 * np.pi * freq * t) + + # Add the sine wave to the reconstructed signal + reconstructed_signal += sine_wave + + # Normalize the reconstructed signal (optional) + if normalize: + reconstructed_signal /= np.max(np.abs(reconstructed_signal)) + + return t, reconstructed_signal + + +def save_sound(wave: np.ndarray, sr: int = 44100, filename: str = "out.mp3"): + wavfile.write(filename, sr, wave.astype(np.float32)) + + +def get_dominant(freq: np.ndarray, fft_magnitude: np.ndarray, N: int = 5): + # sr / 2 is the number of frequencies we can resolve (Nyquist Theorem) + print(f"Using top {N} frequencies ({100*N/sr/2:2.2f}%)") + N = min(N, len(freq)) + # these frequencies we can resolve are the positive ones. + # negative ones contain phase information (ignored for now) + pos_fft_magnitude = fft_magnitude[freq > 0] + pos_freq = freq[freq > 0] + top_indices = np.argsort(pos_fft_magnitude)[-N:] + dominant_frequencies = pos_freq[top_indices] + dominant_magnitudes = pos_fft_magnitude[top_indices] + # Print the dominant frequencies and their magnitudes + for i in range(N): + print( + f"Dominant Frequency {i + 1}: {dominant_frequencies[i]} Hz, Magnitude: {dominant_magnitudes[i]}" + ) + + return dominant_frequencies, dominant_magnitudes + + +if __name__ == "__main__": + filename = "clip.mp3" + audio, sr = load_audio(filename) + print(audio) + print(sr) + # secs = len(audio[0]) // sr + secs = 10 # number of seconds + print(f"Analyzing {filename} ({secs} seconds)") + + plot_audio(audio, sr, secs=secs) + # plot_fourier(audio, secs, sr) + vol, freqs, time, im = get_spectra(audio, secs, sr) + + # take the logarithm to map data to exponents. new range approx (-50, 0) + logvol = np.log(vol) + # rescale it to 0 - 1 (relative volume) + logvol_scaled = (logvol - logvol.min()) / (logvol.max() - logvol.min()) + print(audio.shape, vol.shape, freqs.shape, time.shape) + print(time) + print(freqs) + full_vol = logvol_scaled.sum(axis=1) + + # Analysis of frequencies in section of song (as a whole, not over-time) + freq, fft_magnitude = get_fourier_transform(audio[0, : (secs * sr)], sr) + + N = 10 + dominant_frequencies, dominant_magnitudes = get_dominant(freq, fft_magnitude, N=N) + + fig, ax = plt.subplots() + # ax.bar(pos_freq, pos_fft_magnitude) + ax.bar(dominant_frequencies, dominant_magnitudes) + ax.set_title(f"Dominant Frequencies from first {secs}s") + fig.savefig("out/dominant.png") + + # Reconstruct signal from dominant frequencies + t, new_sig = reconstruct_signal( + dominant_frequencies, dominant_magnitudes, sr=sr, secs=secs + ) + save_sound(new_sig, sr=sr, filename="reconstructed_audio.mp3") + fig, ax = plt.subplots() + ax.set_title("Reconstructed signal") + ax.plot(t, new_sig) + fig.savefig(f"out/reconstructed_signal_{secs}s.png") + fig = px.line(x=t, y=new_sig, title="Reconstructed signal") + fig.write_image(f"out/reconstructed_signal_{secs}s.png") + fig.write_image(f"out/index.html") + + # bins = np.arange(0, vol.shape[0] + 1, 1).astype(int) + + # fig, ax = plt.subplots() + # for t in range(len(time)): + # # vol[:, t] = vol[:, t] + # # sum up frequencies for each of the bins, using indexing + # for i in range(len(bins) - 1): + # idx_start = bins[i] + # idx_end = bins[i + 1] + # sum_vol = np.sum(vol[idx_start:idx_end, t]) + # ax.plot(time[t], sum_vol, c="xkcd:azure", alpha=0.5) + + # plt.savefig(f"out/test-{secs:03d}.png")