diff --git a/main.py b/main.py index 05c1d60..63a7fb6 100644 --- a/main.py +++ b/main.py @@ -11,9 +11,9 @@ def load_audio(filename: str, mono: bool = False) -> (np.ndarray, int): return audio, sr -def get_fourier_transform(audio: np.ndarray, sr: int) -> np.ndarray: +def get_fourier_transform(audio: np.ndarray, sr: int, n: int = None) -> np.ndarray: # Compute the Fourier Transform - fft_data = np.fft.fft(audio) + fft_data = np.fft.fft(audio, n=n) # Compute the magnitude (amplitude) of the complex numbers fft_magnitude = np.abs(fft_data) @@ -102,22 +102,42 @@ def save_sound(wave: np.ndarray, sr: int = 44100, filename: str = "out.mp3"): def get_dominant(freq: np.ndarray, fft_magnitude: np.ndarray, N: int = 5): # sr / 2 is the number of frequencies we can resolve (Nyquist Theorem) - print(f"Using top {N} frequencies ({100*N/sr/2:2.2f}%)") N = min(N, len(freq)) # these frequencies we can resolve are the positive ones. # negative ones contain phase information (ignored for now) pos_fft_magnitude = fft_magnitude[freq > 0] pos_freq = freq[freq > 0] - top_indices = np.argsort(pos_fft_magnitude)[-N:] + top_indices = np.argsort(pos_fft_magnitude)[::-1][:N] dominant_frequencies = pos_freq[top_indices] dominant_magnitudes = pos_fft_magnitude[top_indices] - # Print the dominant frequencies and their magnitudes - for i in range(N): - print( - f"Dominant Frequency {i + 1}: {dominant_frequencies[i]} Hz, Magnitude: {dominant_magnitudes[i]}" - ) - return dominant_frequencies, dominant_magnitudes + return dominant_frequencies, dominant_magnitudes, top_indices + + +def custom_specgram(audio, sr, NFFT: int = 2048, noverlap: int = 1024): + step = NFFT - noverlap + segments = int((len(audio) - noverlap) / step) + + specgram = np.zeros((1 + NFFT // 2, segments)) + + for i in range(segments): + # Extract the segment + start = i * step + end = start + NFFT + segment = audio[start:end] + + # Apply FFT + fft_segment = np.fft.fft(segment, n=NFFT) + + # Extract positive frequencies + pos_fft = fft_segment[: NFFT // 2 + 1] + + # Store the magnitudes + specgram[:, i] = np.abs(pos_fft) + + fft_freq = np.fft.fftfreq(NFFT, d=1 / sr) + fft_freq = fft_freq[: NFFT // 2 + 1] + return specgram, fft_freq if __name__ == "__main__": @@ -146,37 +166,123 @@ if __name__ == "__main__": freq, fft_magnitude = get_fourier_transform(audio[0, : (secs * sr)], sr) N = 10 - dominant_frequencies, dominant_magnitudes = get_dominant(freq, fft_magnitude, N=N) - - fig, ax = plt.subplots() - # ax.bar(pos_freq, pos_fft_magnitude) - ax.bar(dominant_frequencies, dominant_magnitudes) - ax.set_title(f"Dominant Frequencies from first {secs}s") - fig.savefig("out/dominant.png") + dominant_frequencies, dominant_magnitudes, _ = get_dominant( + freq, fft_magnitude, N=N + ) # Reconstruct signal from dominant frequencies t, new_sig = reconstruct_signal( dominant_frequencies, dominant_magnitudes, sr=sr, secs=secs ) save_sound(new_sig, sr=sr, filename="reconstructed_audio.mp3") + + # Image fig, ax = plt.subplots() ax.set_title("Reconstructed signal") ax.plot(t, new_sig) fig.savefig(f"out/reconstructed_signal_{secs}s.png") fig = px.line(x=t, y=new_sig, title="Reconstructed signal") fig.write_image(f"out/reconstructed_signal_{secs}s.png") - fig.write_image(f"out/index.html") + fig.write_html(f"out/index.html") - # bins = np.arange(0, vol.shape[0] + 1, 1).astype(int) + print(f"Using top {N} frequencies ({100*N/sr/2:2.2f}%)") + # Print the dominant frequencies and their magnitudes + for i in range(N): + print( + f"Dominant Frequency {i + 1}: {dominant_frequencies[i]} Hz, Magnitude: {dominant_magnitudes[i]}" + ) - # fig, ax = plt.subplots() - # for t in range(len(time)): - # # vol[:, t] = vol[:, t] - # # sum up frequencies for each of the bins, using indexing - # for i in range(len(bins) - 1): - # idx_start = bins[i] - # idx_end = bins[i + 1] - # sum_vol = np.sum(vol[idx_start:idx_end, t]) - # ax.plot(time[t], sum_vol, c="xkcd:azure", alpha=0.5) + fig, ax = plt.subplots() + # ax.bar(pos_freq, pos_fft_magnitude) + ax.bar( + dominant_frequencies, + dominant_magnitudes / dominant_magnitudes.max(), + color="red", + ) + ax.set_title(f"Dominant Frequencies from first {secs}s") + fig.savefig("out/dominant.png") - # plt.savefig(f"out/test-{secs:03d}.png") + secs = 10 + sp, f = custom_specgram(audio[0, : (secs * sr)], sr) + fig, ax = plt.subplots() + sp = np.log(sp) + ax.imshow(sp, aspect="auto", origin="lower") + fig.savefig(f"out/spec-{secs}.png") + + N = 10 + # Now getting it over each time slice + fig, ax = plt.subplots() + for i in range(sp.shape[1]): + # ax.bar(pos_freq, pos_fft_magnitude) + dominant_frequencies, dominant_magnitudes, _ = get_dominant(f, sp[:, i], N=N) + _ = ax.bar( + dominant_frequencies, dominant_magnitudes / dominant_magnitudes.max() + ) + ax.set_title(f"Dominant Frequencies over time") + fig.savefig("out/time_dominant.png") + + # Let's make an animation. Each frame is a time slice + # get viridis colors + import matplotlib.cm as cm + + cmap = cm.get_cmap("viridis") + # assign each frequency a color (f) + top_freq, _, top_indices = get_dominant(f, sp.sum(axis=1), N=50) + sort_idx = np.argsort(top_freq) + # frequencies correspond to colors. + colors = cmap(top_freq[sort_idx] / top_freq.max()) + + def make_frame(f, sp, colors, i, N, top_indices, sort_idx): + fig, ax = plt.subplots(subplot_kw=dict(projection="polar")) + __, _, indices = get_dominant(f, sp[:, i], N=N) + # draw a circle, with each slice representing a frequency. + # each slice is colored according to the frequency's color + # only the dominant frequencies are shown in each frame. + # each wedge is a frequency, and its radius is the relative magnitude + + # get the colors of the dominant frequencies + C = len(colors) + # colors = colors[indices] + rel_mag = np.zeros(C) + 0.5 + # remove elements of `indices` that are not in `top_indices` + idx = indices[np.isin(indices, top_indices)] + # print(idx, indices, top_indices, sort_idx) + if len(idx) > 0: + # find the location of idx in top_indices + idx = np.where(top_indices == idx[:, None])[1] + rel_mag[idx] = sp[idx, i] / sp[idx, i].max() + 1 + # rel_mag[~idx] = sp[~idx, i] / sp[~idx, i].max() + # draw the wedges. each has equal angle, and the radius is the relative magnitude + ax.bar( + np.arange(C) * 2 * np.pi / C, + rel_mag[sort_idx], + width=2 * np.pi / C, + color=colors, + alpha=1, + ) + ax.set_title(f"{i:06d}") + ax.axis("off") + ax.set_ylim(0, 2) + out_path = f"out/frames/frame-{i:06d}.png" + fig.savefig(out_path) + return out_path + + make_frame(f, sp, colors, 426, N, top_indices, sort_idx) + + # slow... + # frames = list(map(lambda i: make_frame(f, sp, colors, i, N, top_indices), range(sp.shape[1]))) + + # faster + import multiprocessing as mp + + pool = mp.Pool(mp.cpu_count()) + frames = pool.starmap( + make_frame, + [(f, sp, colors, i, N, top_indices, sort_idx) for i in range(sp.shape[1])], + ) + pool.close() + import os + + os.system( + "ffmpeg -r 10 -f image2 -s 1920x1080 -i out/frames/frame-%06d.png -vcodec libx264 -crf 25 -pix_fmt yuv420p test.mp4" + )