Merge pull request #12 from kennethnym/fal-integration

Add code for fal.ai integration
2024-08-25 17:08:48 +01:00
parent 7ea7765404 3299f2dcc6
commit 280aa1dcb4
5 changed files with 133 additions and 47 deletions
--- a/fal_app.py
+++ b/fal_app.py
@@ -0,0 +1,73 @@
 import datetime
 from pathlib import Path
 import threading
 from audiocraft.data.audio import audio_write
 import fal
 from fastapi import Response, status
 import torch
 DATA_DIR = Path("/data/audio")
 PROMPTS = [
    "Create a futuristic lo-fi beat that blends modern electronic elements with synthwave influences. Incorporate smooth, atmospheric synths and gentle, relaxing rhythms to evoke a sense of a serene, neon-lit future. Ensure  the track is continuous with no background noise or interruptions, maintaining a calm and tranquil atmosphere throughout while adding a touch of retro-futuristic vibes.",
    "gentle lo-fi beat with a smooth, mellow piano melody in the background. Ensure there are no background noises or interruptions, maintaining a continuous and seamless flow throughout the track. The beat should be relaxing and tranquil, perfect for a calm and reflective atmosphere.",
    "Create an earthy lo-fi beat that evokes a natural, grounded atmosphere. Incorporate organic sounds like soft percussion, rustling leaves, and gentle acoustic instruments. The track should have a warm, soothing rhythm with a continuous flow and no background noise or interruptions, maintaining a calm and reflective ambiance throughout.",
    "Create a soothing lo-fi beat featuring gentle, melodic guitar riffs. The guitar should be the focal point, supported by subtle, ambient electronic elements and a smooth, relaxed rhythm. Ensure the track is continuous with no background noise or interruptions, maintaining a warm and mellow atmosphere throughout.",
    "Create an ambient lo-fi beat with a tranquil and ethereal atmosphere. Use soft, atmospheric pads, gentle melodies, and minimalistic percussion to evoke a sense of calm and serenity. Ensure the track is continuous with no background noise or interruptions, maintaining a soothing and immersive ambiance throughout.",
 ]
 class InfinifiFalApp(fal.App, keep_alive=300):
    machine_type = "GPU-A6000"
    requirements = [
        "torch==2.1.0",
        "audiocraft==1.3.0",
        "torchaudio==2.1.0",
        "websockets==11.0.3",
        "numpy==1.26.4",
    ]
    __is_generating = False
    def setup(self):
        import torchaudio
        from audiocraft.models.musicgen import MusicGen
        self.model = MusicGen.get_pretrained("facebook/musicgen-large")
        self.model.set_generation_params(duration=60)
    @fal.endpoint("/generate")
    def run(self):
        if self.__is_generating:
            return Response(status_code=status.HTTP_409_CONFLICT)
        threading.Thread(target=self.__generate_audio).start()
    @fal.endpoint("/clips/{index}")
    def get_clips(self, index):
        if self.__is_generating:
            return Response(status_code=status.HTTP_404_NOT_FOUND)
        path = DATA_DIR.joinpath(f"{index}")
        with open(path.with_suffix(".mp3"), "rb") as f:
            data = f.read()
            return Response(content=data)
    def __generate_audio(self):
        self.__is_generating = True
        print(f"[INFO] {datetime.datetime.now()}: generating audio...")
        wav = self.model.generate(PROMPTS)
        for i, one_wav in enumerate(wav):
            path = DATA_DIR.joinpath(f"{i}")
            audio_write(
                path,
                one_wav.cpu(),
                self.model.sample_rate,
                format="mp3",
                strategy="loudness",
                loudness_compressor=True,
                make_parent_dir=True,
            )
        self.__is_generating = False
--- a/generate.py
+++ b/generate.py
@@ -2,22 +2,17 @@ import torchaudio
 from audiocraft.models.musicgen import MusicGen
 from audiocraft.data.audio import audio_write
 from prompts import PROMPTS
 MODEL_NAME = "facebook/musicgen-large"
 MUSIC_DURATION_SECONDS = 60
 model = MusicGen.get_pretrained(MODEL_NAME)
 model.set_generation_params(duration=MUSIC_DURATION_SECONDS)
 descriptions = [
    "Create a futuristic lo-fi beat that blends modern electronic elements with synthwave influences. Incorporate smooth, atmospheric synths and gentle, relaxing rhythms to evoke a sense of a serene, neon-lit future. Ensure  the track is continuous with no background noise or interruptions, maintaining a calm and tranquil atmosphere throughout while adding a touch of retro-futuristic vibes.",
    "gentle lo-fi beat with a smooth, mellow piano melody in the background. Ensure there are no background noises or interruptions, maintaining a continuous and seamless flow throughout the track. The beat should be relaxing and tranquil, perfect for a calm and reflective atmosphere.",
    "Create an earthy lo-fi beat that evokes a natural, grounded atmosphere. Incorporate organic sounds like soft percussion, rustling leaves, and gentle acoustic instruments. The track should have a warm, soothing rhythm with a continuous flow and no background noise or interruptions, maintaining a calm and reflective ambiance throughout.",
    "Create a soothing lo-fi beat featuring gentle, melodic guitar riffs. The guitar should be the focal point, supported by subtle, ambient electronic elements and a smooth, relaxed rhythm. Ensure the track is continuous with no background noise or interruptions, maintaining a warm and mellow atmosphere throughout.",
    "Create an ambient lo-fi beat with a tranquil and ethereal atmosphere. Use soft, atmospheric pads, gentle melodies, and minimalistic percussion to evoke a sense of calm and serenity. Ensure the track is continuous with no background noise or interruptions, maintaining a soothing and immersive ambiance throughout.",
 ]
 def generate(offset=0):
-    wav = model.generate(descriptions)
+    wav = model.generate(PROMPTS)
    for idx, one_wav in enumerate(wav):
        # Will save under {idx}.wav, with loudness normalization at -14 db LUFS.
--- a/generate_manual.py
+++ b/generate_manual.py
@@ -3,6 +3,8 @@ import time
 from audiocraft.models.musicgen import MusicGen
 from audiocraft.data.audio import audio_write
 from prompts import PROMPTS
 MODEL_NAME = "facebook/musicgen-large"
 MUSIC_DURATION_SECONDS = 60
@@ -10,18 +12,11 @@ print("obtaining model...")
 model = MusicGen.get_pretrained(MODEL_NAME)
 model.set_generation_params(duration=MUSIC_DURATION_SECONDS)
 descriptions = [
    "Create a futuristic lo-fi beat that blends modern electronic elements with synthwave influences. Incorporate smooth, atmospheric synths and gentle, relaxing rhythms to evoke a sense of a serene, neon-lit future. Ensure  the track is continuous with no background noise or interruptions, maintaining a calm and tranquil atmosphere throughout while adding a touch of retro-futuristic vibes.",
    "gentle lo-fi beat with a smooth, mellow piano melody in the background. Ensure there are no background noises or interruptions, maintaining a continuous and seamless flow throughout the track. The beat should be relaxing and tranquil, perfect for a calm and reflective atmosphere.",
    "Create an earthy lo-fi beat that evokes a natural, grounded atmosphere. Incorporate organic sounds like soft percussion, rustling leaves, and gentle acoustic instruments. The track should have a warm, soothing rhythm with a continuous flow and no background noise or interruptions, maintaining a calm and reflective ambiance throughout.",
    "Create a soothing lo-fi beat featuring gentle, melodic guitar riffs. The guitar should be the focal point, supported by subtle, ambient electronic elements and a smooth, relaxed rhythm. Ensure the track is continuous with no background noise or interruptions, maintaining a warm and mellow atmosphere throughout.",
    "Create an ambient lo-fi beat with a tranquil and ethereal atmosphere. Use soft, atmospheric pads, gentle melodies, and minimalistic percussion to evoke a sense of calm and serenity. Ensure the track is continuous with no background noise or interruptions, maintaining a soothing and immersive ambiance throughout.",
 ]
 print("model obtained. generating audio...")
 a = time.time()
-wav = model.generate(descriptions)
+wav = model.generate(PROMPTS)
 b = time.time()
 print(f"audio generated. took {b - a} seconds.")
--- a/prompts.py
+++ b/prompts.py
@@ -0,0 +1,7 @@
 PROMPTS = [
    "Create a futuristic lo-fi beat that blends modern electronic elements with synthwave influences. Incorporate smooth, atmospheric synths and gentle, relaxing rhythms to evoke a sense of a serene, neon-lit future. Ensure  the track is continuous with no background noise or interruptions, maintaining a calm and tranquil atmosphere throughout while adding a touch of retro-futuristic vibes.",
    "gentle lo-fi beat with a smooth, mellow piano melody in the background. Ensure there are no background noises or interruptions, maintaining a continuous and seamless flow throughout the track. The beat should be relaxing and tranquil, perfect for a calm and reflective atmosphere.",
    "Create an earthy lo-fi beat that evokes a natural, grounded atmosphere. Incorporate organic sounds like soft percussion, rustling leaves, and gentle acoustic instruments. The track should have a warm, soothing rhythm with a continuous flow and no background noise or interruptions, maintaining a calm and reflective ambiance throughout.",
    "Create a soothing lo-fi beat featuring gentle, melodic guitar riffs. The guitar should be the focal point, supported by subtle, ambient electronic elements and a smooth, relaxed rhythm. Ensure the track is continuous with no background noise or interruptions, maintaining a warm and mellow atmosphere throughout.",
    "Create an ambient lo-fi beat with a tranquil and ethereal atmosphere. Use soft, atmospheric pads, gentle melodies, and minimalistic percussion to evoke a sense of calm and serenity. Ensure the track is continuous with no background noise or interruptions, maintaining a soothing and immersive ambiance throughout.",
 ]
--- a/server.py
+++ b/server.py
@@ -1,9 +1,10 @@
 import threading
 import os
 from time import sleep
 import requests
 import websocket
 from contextlib import asynccontextmanager
-from fastapi import FastAPI, WebSocket, WebSocketDisconnect
+from fastapi import FastAPI, WebSocket, WebSocketDisconnect, status
 from fastapi.responses import FileResponse
 from fastapi.staticfiles import StaticFiles
 from logger import log_info, log_warn
@@ -13,33 +14,29 @@ from websocket_connection_manager import WebSocketConnectionManager
 current_index = -1
 # the timer that periodically advances the current audio track
 t = None
-# websocket connection to the inference server
+inference_url = ""
 ws = None
 ws_url = ""
 ws_connection_manager = WebSocketConnectionManager()
 active_listeners = set()
@asynccontextmanager
 async def lifespan(app: FastAPI):
-    global ws, ws_url
+    global ws, inference_url
-    ws_url = os.environ.get("INFERENCE_SERVER_WS_URL")
+    inference_url = os.environ.get("INFERENCE_SERVER_URL")
-    if not ws_url:
+    if not inference_url:
-        ws_url = "ws://localhost:8001"
+        inference_url = "http://localhost:8001"
    advance()
    yield
    if ws:
        ws.close()
    if t:
        t.cancel()
 def generate_new_audio():
-    if not ws_url:
+    if not inference_url:
        return
    global current_index
@@ -52,31 +49,50 @@ def generate_new_audio():
    else:
        return
-    log_info("generating new audio...")
+    log_info("requesting new audio...")
    try:
-        ws = websocket.create_connection(ws_url)
+        print(f"{inference_url}/generate")
-
+        requests.post(f"{inference_url}/generate")
        ws.send("generate")
        wavs = []
        for i in range(5):
            raw = ws.recv()
            if isinstance(raw, str):
                continue
            wavs.append(raw)
        for i, wav in enumerate(wavs):
            with open(f"{i + offset}.mp3", "wb") as f:
                f.write(wav)
        log_info("audio generated.")
        ws.close()
    except:
        log_warn(
            "inference server potentially unreachable. recycling cached audio for now."
        )
        return
    is_available = False
    while not is_available:
        try:
            res = requests.post(f"{inference_url}/clips/0", stream=True)
        except:
            log_warn(
                "inference server potentially unreachable. recycling cached audio for now."
            )
            return
        if res.status_code != status.HTTP_200_OK:
            print("still generating...")
            sleep(5)
            continue
        print("inference complete! downloading new clips")
        is_available = True
        with open(f"{offset}.mp3", "wb") as f:
            for chunk in res.iter_content(chunk_size=128):
                f.write(chunk)
    for i in range(4):
        res = requests.post(f"{inference_url}/clips/{i + 1}", stream=True)
        if res.status_code != status.HTTP_200_OK:
            continue
        with open(f"{i + 1 + offset}.mp3", "wb") as f:
            for chunk in res.iter_content(chunk_size=128):
                f.write(chunk)
    log_info("audio generated.")
 def advance():