basic animation

2024-01-09 01:21:26 -07:00 · 2024-01-09 01:21:26 -07:00 · 01965030d5
commit 01965030d5
parent ae51028922
1 changed files with 135 additions and 29 deletions
--- a/main.py
+++ b/main.py
@ -11,9 +11,9 @@ def load_audio(filename: str, mono: bool = False) -> (np.ndarray, int):
    return audio, sr


-def get_fourier_transform(audio: np.ndarray, sr: int) -> np.ndarray:
+def get_fourier_transform(audio: np.ndarray, sr: int, n: int = None) -> np.ndarray:
    # Compute the Fourier Transform
-    fft_data = np.fft.fft(audio)
+    fft_data = np.fft.fft(audio, n=n)

    # Compute the magnitude (amplitude) of the complex numbers
    fft_magnitude = np.abs(fft_data)
@ -102,22 +102,42 @@ def save_sound(wave: np.ndarray, sr: int = 44100, filename: str = "out.mp3"):

 def get_dominant(freq: np.ndarray, fft_magnitude: np.ndarray, N: int = 5):
    # sr / 2 is the number of frequencies we can resolve (Nyquist Theorem)
-    print(f"Using top {N} frequencies ({100*N/sr/2:2.2f}%)")
    N = min(N, len(freq))
    # these frequencies we can resolve are the positive ones.
    # negative ones contain phase information (ignored for now)
    pos_fft_magnitude = fft_magnitude[freq > 0]
    pos_freq = freq[freq > 0]
-    top_indices = np.argsort(pos_fft_magnitude)[-N:]
+    top_indices = np.argsort(pos_fft_magnitude)[::-1][:N]
    dominant_frequencies = pos_freq[top_indices]
    dominant_magnitudes = pos_fft_magnitude[top_indices]
-    # Print the dominant frequencies and their magnitudes
-    for i in range(N):
-        print(
-            f"Dominant Frequency {i + 1}: {dominant_frequencies[i]} Hz, Magnitude: {dominant_magnitudes[i]}"
-        )

-    return dominant_frequencies, dominant_magnitudes
+    return dominant_frequencies, dominant_magnitudes, top_indices
+
+
+def custom_specgram(audio, sr, NFFT: int = 2048, noverlap: int = 1024):
+    step = NFFT - noverlap
+    segments = int((len(audio) - noverlap) / step)
+
+    specgram = np.zeros((1 + NFFT // 2, segments))
+
+    for i in range(segments):
+        # Extract the segment
+        start = i * step
+        end = start + NFFT
+        segment = audio[start:end]
+
+        # Apply FFT
+        fft_segment = np.fft.fft(segment, n=NFFT)
+
+        # Extract positive frequencies
+        pos_fft = fft_segment[: NFFT // 2 + 1]
+
+        # Store the magnitudes
+        specgram[:, i] = np.abs(pos_fft)
+
+    fft_freq = np.fft.fftfreq(NFFT, d=1 / sr)
+    fft_freq = fft_freq[: NFFT // 2 + 1]
+    return specgram, fft_freq


 if __name__ == "__main__":
@ -146,37 +166,123 @@ if __name__ == "__main__":
    freq, fft_magnitude = get_fourier_transform(audio[0, : (secs * sr)], sr)

    N = 10
-    dominant_frequencies, dominant_magnitudes = get_dominant(freq, fft_magnitude, N=N)
-
-    fig, ax = plt.subplots()
-    # ax.bar(pos_freq, pos_fft_magnitude)
-    ax.bar(dominant_frequencies, dominant_magnitudes)
-    ax.set_title(f"Dominant Frequencies from first {secs}s")
-    fig.savefig("out/dominant.png")
+    dominant_frequencies, dominant_magnitudes, _ = get_dominant(
+        freq, fft_magnitude, N=N
+    )

    # Reconstruct signal from dominant frequencies
    t, new_sig = reconstruct_signal(
        dominant_frequencies, dominant_magnitudes, sr=sr, secs=secs
    )
    save_sound(new_sig, sr=sr, filename="reconstructed_audio.mp3")
+
+    # Image
    fig, ax = plt.subplots()
    ax.set_title("Reconstructed signal")
    ax.plot(t, new_sig)
    fig.savefig(f"out/reconstructed_signal_{secs}s.png")
    fig = px.line(x=t, y=new_sig, title="Reconstructed signal")
    fig.write_image(f"out/reconstructed_signal_{secs}s.png")
-    fig.write_image(f"out/index.html")
+    fig.write_html(f"out/index.html")

-    # bins = np.arange(0, vol.shape[0] + 1, 1).astype(int)
+    print(f"Using top {N} frequencies ({100*N/sr/2:2.2f}%)")
+    # Print the dominant frequencies and their magnitudes
+    for i in range(N):
+        print(
+            f"Dominant Frequency {i + 1}: {dominant_frequencies[i]} Hz, Magnitude: {dominant_magnitudes[i]}"
+        )

-    # fig, ax = plt.subplots()
-    # for t in range(len(time)):
-    #     # vol[:, t] = vol[:, t]
-    #     # sum up frequencies for each of the bins, using indexing
-    #     for i in range(len(bins) - 1):
-    #         idx_start = bins[i]
-    #         idx_end = bins[i + 1]
-    #         sum_vol = np.sum(vol[idx_start:idx_end, t])
-    #         ax.plot(time[t], sum_vol, c="xkcd:azure", alpha=0.5)
+    fig, ax = plt.subplots()
+    # ax.bar(pos_freq, pos_fft_magnitude)
+    ax.bar(
+        dominant_frequencies,
+        dominant_magnitudes / dominant_magnitudes.max(),
+        color="red",
+    )
+    ax.set_title(f"Dominant Frequencies from first {secs}s")
+    fig.savefig("out/dominant.png")

-    # plt.savefig(f"out/test-{secs:03d}.png")
+    secs = 10
+    sp, f = custom_specgram(audio[0, : (secs * sr)], sr)
+    fig, ax = plt.subplots()
+    sp = np.log(sp)
+    ax.imshow(sp, aspect="auto", origin="lower")
+    fig.savefig(f"out/spec-{secs}.png")
+
+    N = 10
+    # Now getting it over each time slice
+    fig, ax = plt.subplots()
+    for i in range(sp.shape[1]):
+        # ax.bar(pos_freq, pos_fft_magnitude)
+        dominant_frequencies, dominant_magnitudes, _ = get_dominant(f, sp[:, i], N=N)
+        _ = ax.bar(
+            dominant_frequencies, dominant_magnitudes / dominant_magnitudes.max()
+        )
+    ax.set_title(f"Dominant Frequencies over time")
+    fig.savefig("out/time_dominant.png")
+
+    # Let's make an animation. Each frame is a time slice
+    # get viridis colors
+    import matplotlib.cm as cm
+
+    cmap = cm.get_cmap("viridis")
+    # assign each frequency a color (f)
+    top_freq, _, top_indices = get_dominant(f, sp.sum(axis=1), N=50)
+    sort_idx = np.argsort(top_freq)
+    # frequencies correspond to colors.
+    colors = cmap(top_freq[sort_idx] / top_freq.max())
+
+    def make_frame(f, sp, colors, i, N, top_indices, sort_idx):
+        fig, ax = plt.subplots(subplot_kw=dict(projection="polar"))
+        __, _, indices = get_dominant(f, sp[:, i], N=N)
+        # draw a circle, with each slice representing a frequency.
+        # each slice is colored according to the frequency's color
+        # only the dominant frequencies are shown in each frame.
+        # each wedge is a frequency, and its radius is the relative magnitude
+
+        # get the colors of the dominant frequencies
+        C = len(colors)
+        # colors = colors[indices]
+        rel_mag = np.zeros(C) + 0.5
+        # remove elements of `indices` that are not in `top_indices`
+        idx = indices[np.isin(indices, top_indices)]
+        # print(idx, indices, top_indices, sort_idx)
+        if len(idx) > 0:
+            # find the location of idx in top_indices
+            idx = np.where(top_indices == idx[:, None])[1]
+            rel_mag[idx] = sp[idx, i] / sp[idx, i].max() + 1
+            # rel_mag[~idx] = sp[~idx, i] / sp[~idx, i].max()
+        # draw the wedges. each has equal angle, and the radius is the relative magnitude
+        ax.bar(
+            np.arange(C) * 2 * np.pi / C,
+            rel_mag[sort_idx],
+            width=2 * np.pi / C,
+            color=colors,
+            alpha=1,
+        )
+        ax.set_title(f"{i:06d}")
+        ax.axis("off")
+        ax.set_ylim(0, 2)
+        out_path = f"out/frames/frame-{i:06d}.png"
+        fig.savefig(out_path)
+        return out_path
+
+    make_frame(f, sp, colors, 426, N, top_indices, sort_idx)
+
+    # slow...
+    # frames = list(map(lambda i: make_frame(f, sp, colors, i, N, top_indices), range(sp.shape[1])))
+
+    # faster
+    import multiprocessing as mp
+
+    pool = mp.Pool(mp.cpu_count())
+    frames = pool.starmap(
+        make_frame,
+        [(f, sp, colors, i, N, top_indices, sort_idx) for i in range(sp.shape[1])],
+    )
+    pool.close()
+    import os
+
+    os.system(
+        "ffmpeg -r 10 -f image2 -s 1920x1080 -i out/frames/frame-%06d.png -vcodec libx264 -crf 25  -pix_fmt yuv420p test.mp4"
+    )