rvc

Running

App Files Files Community

R-Kentaren commited on about 1 month ago

Commit

9633ddc

verified ·

1 Parent(s): 2442f1f

Update app.py

Browse files

Files changed (1) hide show

app.py +141 -87

app.py CHANGED Viewed

@@ -20,6 +20,7 @@ from pedalboard.io import AudioFile
 from pydub import AudioSegment
 import noisereduce as nr
 import edge_tts
 # Suppress logging
 logging.getLogger("infer_rvc_python").setLevel(logging.ERROR)
@@ -30,13 +31,13 @@ converter = BaseLoader(only_cpu=False, hubert_path=None, rmvpe_path=None)
 # Theme & Title
 title = "<center><strong><font size='7'>🔊 RVC+</font></strong></center>"
 description = """
-<div style="text-align: center; font-size: 1.1em; color: #aaa;">
 This demo is for educational and research purposes only.<br>
 Misuse of voice conversion technology is unethical. Use responsibly.<br>
 Authors are not liable for inappropriate usage.
 </div>
 """
-theme = "Thatguy099/Sonix"
 # Global constants
 PITCH_ALGO_OPT = ["pm", "harvest", "crepe", "rmvpe", "rmvpe+"]
@@ -47,35 +48,67 @@ OUTPUT_DIR = "output"
 os.makedirs(DOWNLOAD_DIR, exist_ok=True)
 os.makedirs(OUTPUT_DIR, exist_ok=True)
 # --- Utility Functions ---
 def find_files(directory, exts=(".pth", ".index", ".zip")):
-    return [os.path.join(directory, f) for f in os.listdir(directory)
-            if f.endswith(exts)]
 def unzip_in_folder(zip_path, extract_to):
     with zipfile.ZipFile(zip_path, 'r') as zip_ref:
         for member in zip_ref.infolist():
             if not member.is_dir():
                 member.filename = os.path.basename(member.filename)
                 zip_ref.extract(member, extract_to)
 def get_file_size(url):
     if "huggingface" not in url.lower():
         raise ValueError("❌ Only Hugging Face links are allowed.")
     try:
-        with urllib.request.urlopen(url) as response:
-            file_size = int(response.headers.get("Content-Length", 0))
         if file_size > MAX_FILE_SIZE:
             raise ValueError(f"⚠️ File too large: {file_size / 1e6:.1f} MB (>500MB)")
         return file_size
     except Exception as e:
         raise RuntimeError(f"❌ Failed to fetch file info: {str(e)}")
-def clear_directory_later(directory, delay=15):
     """Clear temp directory after delay in a background thread."""
     def _clear():
         time.sleep(delay)
@@ -84,21 +117,19 @@ def clear_directory_later(directory, delay=15):
             print(f"🧹 Cleaned up: {directory}")
     threading.Thread(target=_clear, daemon=True).start()
 def find_model_and_index(directory):
     files = find_files(directory)
     model = next((f for f in files if f.endswith(".pth")), None)
     index = next((f for f in files if f.endswith(".index")), None)
     return model, index
 # --- Model Download Handler ---
 @spaces.GPU(duration=60)
 def download_model(url_data):
     if not url_data.strip():
         raise ValueError("❌ No URL provided.")
-    urls = [u.strip().replace("/blob/", "/resolve/") for u in url_data.split(",") if u.strip()]
     if len(urls) > 2:
         raise ValueError("❌ Provide up to two URLs (model.pth, index.index).")
@@ -111,8 +142,34 @@ def download_model(url_data):
     os.makedirs(directory, exist_ok=True)
     try:
         for url in urls:
-            download_manager(url=url, path=directory, extension="")
         # Unzip if needed
         for f in find_files(directory, (".zip",)):
@@ -137,7 +194,6 @@ def download_model(url_data):
         shutil.rmtree(directory, ignore_errors=True)
         raise gr.Error(f"❌ Download failed: {str(e)}")
 # --- Audio Processing ---
 def apply_noisereduce(audio_paths):
     results = []
@@ -162,7 +218,6 @@ def apply_noisereduce(audio_paths):
             results.append(path)
     return results
 def apply_audio_effects(audio_paths):
     results = []
     board = Pedalboard([
@@ -186,13 +241,11 @@ def apply_audio_effects(audio_paths):
             results.append(path)
     return results
 # --- TTS Handler ---
 async def generate_tts(text, voice, output_path):
     communicate = edge_tts.Communicate(text, voice.split("-")[0])
     await communicate.save(output_path)
 def infer_tts(tts_voice, tts_text, play_tts):
     if not tts_text.strip():
         raise ValueError("❌ Text is empty.")
@@ -209,7 +262,6 @@ def infer_tts(tts_voice, tts_text, play_tts):
     except Exception as e:
         raise gr.Error(f"TTS generation failed: {str(e)}")
 # --- Main Conversion Function ---
 @spaces.GPU()
 def run_conversion(
@@ -258,7 +310,6 @@ def run_conversion(
     return results
 # --- Gradio UI Builder ---
 def create_ui():
     with gr.Blocks(theme=theme, title="RVC+", fill_width=True, delete_cache=(3200, 3200)) as app:
@@ -268,68 +319,66 @@ def create_ui():
         with gr.Tabs():
             # ============= TAB 1: Voice Conversion =============
             with gr.Tab("🎤 Voice Conversion", id=0):
-                gr.Markdown("### 🔊 Upload audio and select your model.")
                 with gr.Row():
-                    with gr.Column(scale=2):
                         audio_input = gr.File(
-                            label="Upload Audio (WAV, MP3, OGG)",
                             file_count="multiple",
                             type="filepath"
                         )
                     with gr.Column(scale=1):
                         with gr.Group():
-                            model_file = gr.File(label="Upload .pth Model", type="filepath", height=100)
-                            index_file = gr.File(label="Upload .index File (Optional)", type="filepath", height=100)
-                # Download Model Section
-                gr.Markdown("📥 Or download model from URL:")
-                with gr.Row():
-                    use_url = gr.Checkbox(label="🌐 Use Model URL", value=False)
-                with gr.Group(visible=False) as url_group:
-                    gr.Markdown(
-                        "🔗 Paste Hugging Face link(s):<br>"
-                        "• Single ZIP: `https://hf.co/.../model.zip`<br>"
-                        "• Two links: `https://hf.co/.../model.pth, https://hf.co/.../model.index`"
-                    )
-                    model_url = gr.Textbox(
-                        placeholder="https://huggingface.co/...",
-                        label="Model URL(s)",
-                        lines=1
-                    )
-                    download_btn = gr.Button("⬇️ Download Model", variant="secondary")
-                download_btn.click(
-                    download_model,
-                    inputs=[model_url],
-                    outputs=[model_file, index_file]
-                )
-                use_url.change(
-                    lambda x: gr.update(visible=x),
-                    inputs=[use_url],
-                    outputs=[url_group]
-                )
-                # Parameters
-                gr.Markdown("### ⚙️ Conversion Settings")
-                with gr.Row():
-                    pitch_algo = gr.Dropdown(PITCH_ALGO_OPT, value="rmvpe+", label="Pitch Algorithm")
-                    pitch_level = gr.Slider(-24, 24, value=0, step=1, label="Pitch Level")
-                with gr.Row():
-                    index_rate = gr.Slider(0, 1, value=0.75, label="Index Influence")
-                    filter_radius = gr.Slider(0, 7, value=3, step=1, label="Median Filter")
-                with gr.Row():
-                    rms_mix_rate = gr.Slider(0, 1, value=0.25, label="Volume Envelope")
-                    protect = gr.Slider(0, 0.5, value=0.5, label="Consonant Protection")
-                # Post-processing
-                with gr.Row():
-                    denoise = gr.Checkbox(False, label="🔇 Denoise Output")
-                    reverb = gr.Checkbox(False, label="🎛️ Add Reverb")
-                # Run Button
-                convert_btn = gr.Button("🚀 Convert Voice", variant="primary")
-                output_files = gr.File(label="✅ Converted Audio", file_count="multiple")
                 convert_btn.click(
                     run_conversion,
@@ -352,22 +401,28 @@ def create_ui():
             # ============= TAB 2: Text-to-Speech =============
             with gr.Tab("🗣️ Text-to-Speech", id=1):
                 gr.Markdown("### Convert text to speech using Edge TTS.")
-                tts_voice_list = sorted(
-                    [f"{v['ShortName']}-{v['Gender']}" for v in asyncio.run(edge_tts.list_voices())]
-                )
                 with gr.Row():
-                    with gr.Column():
                         tts_text = gr.Textbox(
-                            placeholder="Type your message here...",
                             label="Text Input",
-                            lines=4
                         )
-                        tts_voice = gr.Dropdown(tts_voice_list, value=tts_voice_list[0], label="Voice")
                         tts_play = gr.Checkbox(False, label="🎧 Auto-play audio")
                         tts_btn = gr.Button("🔊 Generate Speech", variant="secondary")
-                    with gr.Column():
-                        tts_output_audio = gr.File(label="Generated Audio", type="filepath")
                         tts_preview = gr.Audio(label="Preview", visible=False, autoplay=True)
                 tts_btn.click(
@@ -380,7 +435,7 @@ def create_ui():
                     outputs=[tts_preview]
                 )
-        # Examples
         gr.Markdown("### 📚 Examples")
         gr.Examples(
             examples=[
@@ -398,7 +453,6 @@ def create_ui():
     return app
 # --- Launch App ---
 if __name__ == "__main__":
     app = create_ui()

 from pydub import AudioSegment
 import noisereduce as nr
 import edge_tts
+from huggingface_hub import hf_hub_download, HfApi # For robust HF link handling
 # Suppress logging
 logging.getLogger("infer_rvc_python").setLevel(logging.ERROR)
 # Theme & Title
 title = "<center><strong><font size='7'>🔊 RVC+</font></strong></center>"
 description = """
+<div style="text-align: center; font-size: 1.1em; color: #aaa; margin: 10px 0;">
 This demo is for educational and research purposes only.<br>
 Misuse of voice conversion technology is unethical. Use responsibly.<br>
 Authors are not liable for inappropriate usage.
 </div>
 """
+theme = "Thatguy099/Sonix" # Maintained as requested
 # Global constants
 PITCH_ALGO_OPT = ["pm", "harvest", "crepe", "rmvpe", "rmvpe+"]
 os.makedirs(DOWNLOAD_DIR, exist_ok=True)
 os.makedirs(OUTPUT_DIR, exist_ok=True)
 # --- Utility Functions ---
 def find_files(directory, exts=(".pth", ".index", ".zip")):
+    return [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith(exts)]
 def unzip_in_folder(zip_path, extract_to):
     with zipfile.ZipFile(zip_path, 'r') as zip_ref:
         for member in zip_ref.infolist():
             if not member.is_dir():
+                # Preserve filename, avoid path traversal
                 member.filename = os.path.basename(member.filename)
                 zip_ref.extract(member, extract_to)
 def get_file_size(url):
+    """Check file size for Hugging Face URLs, supporting resolve/main links."""
     if "huggingface" not in url.lower():
         raise ValueError("❌ Only Hugging Face links are allowed.")
     try:
+        # The Hugging Face Hub API can handle various link types
+        api = HfApi()
+        # Extract repo_id and filename from the URL
+        if "/resolve/main/" in url:
+            parts = url.split("/resolve/main/")
+        elif "/resolve/" in url:
+            # Handle specific branches
+            parts = url.split("/resolve/")
+            parts[1] = parts[1].split("/", 1)[1] # Remove branch name
+        else:
+            # Assume it's a blob link or direct file link
+            parts = url.rstrip("/").rsplit("/", 2)
+            if len(parts) == 3:
+                repo_parts = "/".join(parts[0].split("/")[-2:])
+                filename = parts[2]
+                repo_id = f"{parts[0].split('/')[-2]}/{parts[0].split('/')[-1]}"
+                file_info = api.repo_info(repo_id=repo_id, repo_type="model")
+                file_entry = next((f for f in file_info.siblings if f.rfilename == filename), None)
+                if not file_entry:
+                    raise ValueError(f"❌ File '{filename}' not found in repository '{repo_id}'.")
+                file_size = file_entry.size
+                if file_size > MAX_FILE_SIZE:
+                    raise ValueError(f"⚠️ File too large: {file_size / 1e6:.1f} MB (>500MB)")
+                return file_size
+            else:
+                raise ValueError("❌ Unable to parse Hugging Face URL.")
+        repo_parts = parts[0].split("/")[-2:]
+        repo_id = f"{repo_parts[0]}/{repo_parts[1]}"
+        filename = parts[1]
+        file_info = api.repo_info(repo_id=repo_id, repo_type="model")
+        file_entry = next((f for f in file_info.siblings if f.rfilename == filename), None)
+        if not file_entry:
+            raise ValueError(f"❌ File '{filename}' not found in repository '{repo_id}'.")
+        file_size = file_entry.size
         if file_size > MAX_FILE_SIZE:
             raise ValueError(f"⚠️ File too large: {file_size / 1e6:.1f} MB (>500MB)")
         return file_size
     except Exception as e:
         raise RuntimeError(f"❌ Failed to fetch file info: {str(e)}")
+def clear_directory_later(directory, delay=30):
     """Clear temp directory after delay in a background thread."""
     def _clear():
         time.sleep(delay)
             print(f"🧹 Cleaned up: {directory}")
     threading.Thread(target=_clear, daemon=True).start()
 def find_model_and_index(directory):
     files = find_files(directory)
     model = next((f for f in files if f.endswith(".pth")), None)
     index = next((f for f in files if f.endswith(".index")), None)
     return model, index
 # --- Model Download Handler ---
 @spaces.GPU(duration=60)
 def download_model(url_data):
     if not url_data.strip():
         raise ValueError("❌ No URL provided.")
+    urls = [u.strip() for u in url_data.split(",") if u.strip()]
     if len(urls) > 2:
         raise ValueError("❌ Provide up to two URLs (model.pth, index.index).")
     os.makedirs(directory, exist_ok=True)
     try:
+        downloaded_files = []
         for url in urls:
+            # Use the robust Hugging Face Hub library for download
+            # This handles resolve/main, blob, and other link types seamlessly
+            parsed_url = urllib.parse.urlparse(url)
+            path_parts = parsed_url.path.strip("/").split("/")
+            if len(path_parts) < 4:
+                raise ValueError("❌ Invalid Hugging Face URL structure.")
+            repo_id = f"{path_parts[0]}/{path_parts[1]}"
+            revision = "main"
+            if "resolve" in path_parts:
+                resolve_idx = path_parts.index("resolve")
+                if resolve_idx + 1 < len(path_parts):
+                    revision = path_parts[resolve_idx + 1]
+                filename = "/".join(path_parts[resolve_idx + 2:])
+            else:
+                # Assume it's a blob link pointing to a file
+                filename = path_parts[-1]
+            # Download the file
+            local_path = hf_hub_download(
+                repo_id=repo_id,
+                filename=filename,
+                revision=revision,
+                cache_dir=directory,
+                local_dir=directory,
+                local_dir_use_symlinks=False
+            )
+            downloaded_files.append(local_path)
         # Unzip if needed
         for f in find_files(directory, (".zip",)):
         shutil.rmtree(directory, ignore_errors=True)
         raise gr.Error(f"❌ Download failed: {str(e)}")
 # --- Audio Processing ---
 def apply_noisereduce(audio_paths):
     results = []
             results.append(path)
     return results
 def apply_audio_effects(audio_paths):
     results = []
     board = Pedalboard([
             results.append(path)
     return results
 # --- TTS Handler ---
 async def generate_tts(text, voice, output_path):
     communicate = edge_tts.Communicate(text, voice.split("-")[0])
     await communicate.save(output_path)
 def infer_tts(tts_voice, tts_text, play_tts):
     if not tts_text.strip():
         raise ValueError("❌ Text is empty.")
     except Exception as e:
         raise gr.Error(f"TTS generation failed: {str(e)}")
 # --- Main Conversion Function ---
 @spaces.GPU()
 def run_conversion(
     return results
 # --- Gradio UI Builder ---
 def create_ui():
     with gr.Blocks(theme=theme, title="RVC+", fill_width=True, delete_cache=(3200, 3200)) as app:
         with gr.Tabs():
             # ============= TAB 1: Voice Conversion =============
             with gr.Tab("🎤 Voice Conversion", id=0):
                 with gr.Row():
+                    with gr.Column(scale=1):
+                        gr.Markdown("### 🔊 Upload Audio")
                         audio_input = gr.File(
+                            label="Audio Files (WAV, MP3, OGG)",
                             file_count="multiple",
                             type="filepath"
                         )
+                        gr.Markdown("### 📥 Load Model")
+                        model_file = gr.File(label="Upload .pth Model", type="filepath", file_count=1)
+                        index_file = gr.File(label="Upload .index File (Optional)", type="filepath", file_count=1)
+                        use_url = gr.Checkbox(label="🌐 Download from Hugging Face URL", value=False)
+                        with gr.Group(visible=False) as url_group:
+                            gr.Markdown(
+                                "🔗 Paste Hugging Face link(s):<br>"
+                                "• Direct ZIP: `https://hf.co/user/repo/resolve/main/model.zip`<br>"
+                                "• Separate files: `https://hf.co/user/repo/resolve/main/model.pth, https://hf.co/user/repo/resolve/main/model.index`"
+                            )
+                            model_url = gr.Textbox(
+                                placeholder="https://huggingface.co/user/repo/resolve/main/file.pth",
+                                label="Model URL(s)",
+                                lines=2
+                            )
+                            download_btn = gr.Button("⬇️ Download Model", variant="secondary")
+                        use_url.change(
+                            lambda x: gr.update(visible=x),
+                            inputs=[use_url],
+                            outputs=[url_group]
+                        )
+                        download_btn.click(
+                            download_model,
+                            inputs=[model_url],
+                            outputs=[model_file, index_file]
+                        ).then(
+                            lambda: gr.update(visible=False), # Hide URL group after download
+                            outputs=[url_group]
+                        )
                     with gr.Column(scale=1):
+                        gr.Markdown("### ⚙️ Conversion Settings")
                         with gr.Group():
+                            pitch_algo = gr.Dropdown(PITCH_ALGO_OPT, value="rmvpe+", label="Pitch Algorithm")
+                            pitch_level = gr.Slider(-24, 24, value=0, step=1, label="Pitch Level")
+                            index_rate = gr.Slider(0, 1, value=0.75, label="Index Influence")
+                            filter_radius = gr.Slider(0, 7, value=3, step=1, label="Median Filter")
+                            rms_mix_rate = gr.Slider(0, 1, value=0.25, label="Volume Envelope")
+                            protect = gr.Slider(0, 0.5, value=0.5, label="Consonant Protection")
+                            denoise = gr.Checkbox(False, label="🔇 Denoise Output")
+                            reverb = gr.Checkbox(False, label="🎛️ Add Reverb")
+                        convert_btn = gr.Button("🚀 Convert Voice", variant="primary", size="lg")
+                        output_files = gr.File(label="✅ Converted Audio", file_count="multiple")
                 convert_btn.click(
                     run_conversion,
             # ============= TAB 2: Text-to-Speech =============
             with gr.Tab("🗣️ Text-to-Speech", id=1):
                 gr.Markdown("### Convert text to speech using Edge TTS.")
+                # Pre-fetch voices outside the main event loop if possible, or handle async properly
+                try:
+                    tts_voice_list = sorted(
+                        [f"{v['ShortName']}-{v['Gender']}" for v in asyncio.run(edge_tts.list_voices())]
+                    )
+                except:
+                    tts_voice_list = ["en-US-JennyNeural-Female"] # Fallback
                 with gr.Row():
+                    with gr.Column(scale=1):
                         tts_text = gr.Textbox(
+                            placeholder="Enter your text here...",
                             label="Text Input",
+                            lines=5
                         )
+                        tts_voice = gr.Dropdown(tts_voice_list, value=tts_voice_list[0] if tts_voice_list else None, label="Voice")
                         tts_play = gr.Checkbox(False, label="🎧 Auto-play audio")
                         tts_btn = gr.Button("🔊 Generate Speech", variant="secondary")
+                    with gr.Column(scale=1):
+                        tts_output_audio = gr.File(label="Download Audio", type="filepath")
                         tts_preview = gr.Audio(label="Preview", visible=False, autoplay=True)
                 tts_btn.click(
                     outputs=[tts_preview]
                 )
+        # Examples (Moved to be more prominent)
         gr.Markdown("### 📚 Examples")
         gr.Examples(
             examples=[
     return app
 # --- Launch App ---
 if __name__ == "__main__":
     app = create_ui()