add main.py
This commit is contained in:
parent
b1e9984cbb
commit
ae51028922
182
main.py
Normal file
182
main.py
Normal file
@ -0,0 +1,182 @@
|
|||||||
|
import librosa
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import scipy.io.wavfile as wavfile
|
||||||
|
import plotly.express as px
|
||||||
|
|
||||||
|
|
||||||
|
def load_audio(filename: str, mono: bool = False) -> (np.ndarray, int):
|
||||||
|
audio, sr = librosa.load(filename, sr=None, mono=mono)
|
||||||
|
# sr=None ensures original sampling rate is used
|
||||||
|
return audio, sr
|
||||||
|
|
||||||
|
|
||||||
|
def get_fourier_transform(audio: np.ndarray, sr: int) -> np.ndarray:
|
||||||
|
# Compute the Fourier Transform
|
||||||
|
fft_data = np.fft.fft(audio)
|
||||||
|
|
||||||
|
# Compute the magnitude (amplitude) of the complex numbers
|
||||||
|
fft_magnitude = np.abs(fft_data)
|
||||||
|
|
||||||
|
# Create frequency bins
|
||||||
|
freq = np.fft.fftfreq(len(fft_data), d=1 / sr)
|
||||||
|
|
||||||
|
return freq, fft_magnitude
|
||||||
|
|
||||||
|
|
||||||
|
def plot_audio(audio: np.ndarray, sr: int, secs: int = 10) -> None:
|
||||||
|
if audio.ndim == 1:
|
||||||
|
audio = np.array([audio, audio])
|
||||||
|
sample = audio[:, : (sr * secs)]
|
||||||
|
fig, ax = plt.subplots()
|
||||||
|
ax.plot(sample[0], label="L", c="xkcd:azure", alpha=0.5)
|
||||||
|
ax.plot(sample[1], label="R", c="xkcd:orange", alpha=0.5)
|
||||||
|
plt.savefig(f"out/{secs:03d}.png")
|
||||||
|
|
||||||
|
|
||||||
|
def plot_fourier(audio, secs, sr):
|
||||||
|
print("fourier")
|
||||||
|
freq, fft_magnitude = get_fourier_transform(audio[0, : (secs * sr)], sr)
|
||||||
|
plt.figure(figsize=(10, 6))
|
||||||
|
plt.bar(freq, fft_magnitude, width=1.0) # Using bar plot to mimic histogram
|
||||||
|
plt.xlabel("Frequency (Hz)")
|
||||||
|
plt.ylabel("Magnitude")
|
||||||
|
plt.xscale("log") # Logarithmic scale for frequency axis
|
||||||
|
plt.title("Frequency Spectrum")
|
||||||
|
plt.savefig(f"out/fourier-{secs:03d}.png")
|
||||||
|
|
||||||
|
|
||||||
|
def get_spectra(audio, secs, sr):
|
||||||
|
print("spectra")
|
||||||
|
# NFFT: Number of data points used in each block for the FFT. A larger value provides better frequency resolution but worse time resolution. 2048 is a common choice.
|
||||||
|
# noverlap: The number of points of overlap between blocks. 1024 is half of NFFT, which is a common choice.
|
||||||
|
# Plotting the Spectrogram using plt.specgram
|
||||||
|
plt.figure(figsize=(10, 6))
|
||||||
|
Pxx, freqs, bins, im = plt.specgram(
|
||||||
|
audio[1, : (secs * sr)],
|
||||||
|
Fs=sr,
|
||||||
|
NFFT=2048,
|
||||||
|
noverlap=1024,
|
||||||
|
cmap="viridis",
|
||||||
|
scale="dB",
|
||||||
|
)
|
||||||
|
plt.xlabel("Time (s)")
|
||||||
|
plt.ylabel("Frequency (Hz)")
|
||||||
|
plt.colorbar(im).set_label("Intensity (dB)")
|
||||||
|
plt.title("Spectrogram")
|
||||||
|
plt.savefig(f"out/spectra-{secs:03d}.png")
|
||||||
|
|
||||||
|
return Pxx, freqs, bins, im
|
||||||
|
|
||||||
|
|
||||||
|
def reconstruct_signal(
|
||||||
|
freq, mag, sr: int = 44100, secs: int = 1, normalize: bool = True
|
||||||
|
):
|
||||||
|
# Sample rate and duration of the original audio
|
||||||
|
duration = secs # seconds
|
||||||
|
|
||||||
|
# Create an array for time
|
||||||
|
t = np.linspace(0, duration, duration * sr, endpoint=False)
|
||||||
|
|
||||||
|
# Initialize the reconstructed signal as zeros
|
||||||
|
reconstructed_signal = np.zeros(len(t))
|
||||||
|
|
||||||
|
# Add each dominant frequency as a sine wave
|
||||||
|
for freq, magnitude in zip(freq, mag):
|
||||||
|
# Generate a sine wave for the dominant frequency
|
||||||
|
sine_wave = magnitude * np.sin(2 * np.pi * freq * t)
|
||||||
|
|
||||||
|
# Add the sine wave to the reconstructed signal
|
||||||
|
reconstructed_signal += sine_wave
|
||||||
|
|
||||||
|
# Normalize the reconstructed signal (optional)
|
||||||
|
if normalize:
|
||||||
|
reconstructed_signal /= np.max(np.abs(reconstructed_signal))
|
||||||
|
|
||||||
|
return t, reconstructed_signal
|
||||||
|
|
||||||
|
|
||||||
|
def save_sound(wave: np.ndarray, sr: int = 44100, filename: str = "out.mp3"):
|
||||||
|
wavfile.write(filename, sr, wave.astype(np.float32))
|
||||||
|
|
||||||
|
|
||||||
|
def get_dominant(freq: np.ndarray, fft_magnitude: np.ndarray, N: int = 5):
|
||||||
|
# sr / 2 is the number of frequencies we can resolve (Nyquist Theorem)
|
||||||
|
print(f"Using top {N} frequencies ({100*N/sr/2:2.2f}%)")
|
||||||
|
N = min(N, len(freq))
|
||||||
|
# these frequencies we can resolve are the positive ones.
|
||||||
|
# negative ones contain phase information (ignored for now)
|
||||||
|
pos_fft_magnitude = fft_magnitude[freq > 0]
|
||||||
|
pos_freq = freq[freq > 0]
|
||||||
|
top_indices = np.argsort(pos_fft_magnitude)[-N:]
|
||||||
|
dominant_frequencies = pos_freq[top_indices]
|
||||||
|
dominant_magnitudes = pos_fft_magnitude[top_indices]
|
||||||
|
# Print the dominant frequencies and their magnitudes
|
||||||
|
for i in range(N):
|
||||||
|
print(
|
||||||
|
f"Dominant Frequency {i + 1}: {dominant_frequencies[i]} Hz, Magnitude: {dominant_magnitudes[i]}"
|
||||||
|
)
|
||||||
|
|
||||||
|
return dominant_frequencies, dominant_magnitudes
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
filename = "clip.mp3"
|
||||||
|
audio, sr = load_audio(filename)
|
||||||
|
print(audio)
|
||||||
|
print(sr)
|
||||||
|
# secs = len(audio[0]) // sr
|
||||||
|
secs = 10 # number of seconds
|
||||||
|
print(f"Analyzing {filename} ({secs} seconds)")
|
||||||
|
|
||||||
|
plot_audio(audio, sr, secs=secs)
|
||||||
|
# plot_fourier(audio, secs, sr)
|
||||||
|
vol, freqs, time, im = get_spectra(audio, secs, sr)
|
||||||
|
|
||||||
|
# take the logarithm to map data to exponents. new range approx (-50, 0)
|
||||||
|
logvol = np.log(vol)
|
||||||
|
# rescale it to 0 - 1 (relative volume)
|
||||||
|
logvol_scaled = (logvol - logvol.min()) / (logvol.max() - logvol.min())
|
||||||
|
print(audio.shape, vol.shape, freqs.shape, time.shape)
|
||||||
|
print(time)
|
||||||
|
print(freqs)
|
||||||
|
full_vol = logvol_scaled.sum(axis=1)
|
||||||
|
|
||||||
|
# Analysis of frequencies in section of song (as a whole, not over-time)
|
||||||
|
freq, fft_magnitude = get_fourier_transform(audio[0, : (secs * sr)], sr)
|
||||||
|
|
||||||
|
N = 10
|
||||||
|
dominant_frequencies, dominant_magnitudes = get_dominant(freq, fft_magnitude, N=N)
|
||||||
|
|
||||||
|
fig, ax = plt.subplots()
|
||||||
|
# ax.bar(pos_freq, pos_fft_magnitude)
|
||||||
|
ax.bar(dominant_frequencies, dominant_magnitudes)
|
||||||
|
ax.set_title(f"Dominant Frequencies from first {secs}s")
|
||||||
|
fig.savefig("out/dominant.png")
|
||||||
|
|
||||||
|
# Reconstruct signal from dominant frequencies
|
||||||
|
t, new_sig = reconstruct_signal(
|
||||||
|
dominant_frequencies, dominant_magnitudes, sr=sr, secs=secs
|
||||||
|
)
|
||||||
|
save_sound(new_sig, sr=sr, filename="reconstructed_audio.mp3")
|
||||||
|
fig, ax = plt.subplots()
|
||||||
|
ax.set_title("Reconstructed signal")
|
||||||
|
ax.plot(t, new_sig)
|
||||||
|
fig.savefig(f"out/reconstructed_signal_{secs}s.png")
|
||||||
|
fig = px.line(x=t, y=new_sig, title="Reconstructed signal")
|
||||||
|
fig.write_image(f"out/reconstructed_signal_{secs}s.png")
|
||||||
|
fig.write_image(f"out/index.html")
|
||||||
|
|
||||||
|
# bins = np.arange(0, vol.shape[0] + 1, 1).astype(int)
|
||||||
|
|
||||||
|
# fig, ax = plt.subplots()
|
||||||
|
# for t in range(len(time)):
|
||||||
|
# # vol[:, t] = vol[:, t]
|
||||||
|
# # sum up frequencies for each of the bins, using indexing
|
||||||
|
# for i in range(len(bins) - 1):
|
||||||
|
# idx_start = bins[i]
|
||||||
|
# idx_end = bins[i + 1]
|
||||||
|
# sum_vol = np.sum(vol[idx_start:idx_end, t])
|
||||||
|
# ax.plot(time[t], sum_vol, c="xkcd:azure", alpha=0.5)
|
||||||
|
|
||||||
|
# plt.savefig(f"out/test-{secs:03d}.png")
|
Loading…
Reference in New Issue
Block a user