R-Kentaren commited on
Commit
9633ddc
·
verified ·
1 Parent(s): 2442f1f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +141 -87
app.py CHANGED
@@ -20,6 +20,7 @@ from pedalboard.io import AudioFile
20
  from pydub import AudioSegment
21
  import noisereduce as nr
22
  import edge_tts
 
23
 
24
  # Suppress logging
25
  logging.getLogger("infer_rvc_python").setLevel(logging.ERROR)
@@ -30,13 +31,13 @@ converter = BaseLoader(only_cpu=False, hubert_path=None, rmvpe_path=None)
30
  # Theme & Title
31
  title = "<center><strong><font size='7'>🔊 RVC+</font></strong></center>"
32
  description = """
33
- <div style="text-align: center; font-size: 1.1em; color: #aaa;">
34
  This demo is for educational and research purposes only.<br>
35
  Misuse of voice conversion technology is unethical. Use responsibly.<br>
36
  Authors are not liable for inappropriate usage.
37
  </div>
38
  """
39
- theme = "Thatguy099/Sonix"
40
 
41
  # Global constants
42
  PITCH_ALGO_OPT = ["pm", "harvest", "crepe", "rmvpe", "rmvpe+"]
@@ -47,35 +48,67 @@ OUTPUT_DIR = "output"
47
  os.makedirs(DOWNLOAD_DIR, exist_ok=True)
48
  os.makedirs(OUTPUT_DIR, exist_ok=True)
49
 
50
-
51
  # --- Utility Functions ---
52
  def find_files(directory, exts=(".pth", ".index", ".zip")):
53
- return [os.path.join(directory, f) for f in os.listdir(directory)
54
- if f.endswith(exts)]
55
-
56
 
57
  def unzip_in_folder(zip_path, extract_to):
58
  with zipfile.ZipFile(zip_path, 'r') as zip_ref:
59
  for member in zip_ref.infolist():
60
  if not member.is_dir():
 
61
  member.filename = os.path.basename(member.filename)
62
  zip_ref.extract(member, extract_to)
63
 
64
-
65
  def get_file_size(url):
 
66
  if "huggingface" not in url.lower():
67
  raise ValueError("❌ Only Hugging Face links are allowed.")
68
  try:
69
- with urllib.request.urlopen(url) as response:
70
- file_size = int(response.headers.get("Content-Length", 0))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  if file_size > MAX_FILE_SIZE:
72
  raise ValueError(f"⚠️ File too large: {file_size / 1e6:.1f} MB (>500MB)")
73
  return file_size
74
  except Exception as e:
75
  raise RuntimeError(f"❌ Failed to fetch file info: {str(e)}")
76
 
77
-
78
- def clear_directory_later(directory, delay=15):
79
  """Clear temp directory after delay in a background thread."""
80
  def _clear():
81
  time.sleep(delay)
@@ -84,21 +117,19 @@ def clear_directory_later(directory, delay=15):
84
  print(f"🧹 Cleaned up: {directory}")
85
  threading.Thread(target=_clear, daemon=True).start()
86
 
87
-
88
  def find_model_and_index(directory):
89
  files = find_files(directory)
90
  model = next((f for f in files if f.endswith(".pth")), None)
91
  index = next((f for f in files if f.endswith(".index")), None)
92
  return model, index
93
 
94
-
95
  # --- Model Download Handler ---
96
  @spaces.GPU(duration=60)
97
  def download_model(url_data):
98
  if not url_data.strip():
99
  raise ValueError("❌ No URL provided.")
100
 
101
- urls = [u.strip().replace("/blob/", "/resolve/") for u in url_data.split(",") if u.strip()]
102
  if len(urls) > 2:
103
  raise ValueError("❌ Provide up to two URLs (model.pth, index.index).")
104
 
@@ -111,8 +142,34 @@ def download_model(url_data):
111
  os.makedirs(directory, exist_ok=True)
112
 
113
  try:
 
114
  for url in urls:
115
- download_manager(url=url, path=directory, extension="")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
  # Unzip if needed
118
  for f in find_files(directory, (".zip",)):
@@ -137,7 +194,6 @@ def download_model(url_data):
137
  shutil.rmtree(directory, ignore_errors=True)
138
  raise gr.Error(f"❌ Download failed: {str(e)}")
139
 
140
-
141
  # --- Audio Processing ---
142
  def apply_noisereduce(audio_paths):
143
  results = []
@@ -162,7 +218,6 @@ def apply_noisereduce(audio_paths):
162
  results.append(path)
163
  return results
164
 
165
-
166
  def apply_audio_effects(audio_paths):
167
  results = []
168
  board = Pedalboard([
@@ -186,13 +241,11 @@ def apply_audio_effects(audio_paths):
186
  results.append(path)
187
  return results
188
 
189
-
190
  # --- TTS Handler ---
191
  async def generate_tts(text, voice, output_path):
192
  communicate = edge_tts.Communicate(text, voice.split("-")[0])
193
  await communicate.save(output_path)
194
 
195
-
196
  def infer_tts(tts_voice, tts_text, play_tts):
197
  if not tts_text.strip():
198
  raise ValueError("❌ Text is empty.")
@@ -209,7 +262,6 @@ def infer_tts(tts_voice, tts_text, play_tts):
209
  except Exception as e:
210
  raise gr.Error(f"TTS generation failed: {str(e)}")
211
 
212
-
213
  # --- Main Conversion Function ---
214
  @spaces.GPU()
215
  def run_conversion(
@@ -258,7 +310,6 @@ def run_conversion(
258
 
259
  return results
260
 
261
-
262
  # --- Gradio UI Builder ---
263
  def create_ui():
264
  with gr.Blocks(theme=theme, title="RVC+", fill_width=True, delete_cache=(3200, 3200)) as app:
@@ -268,68 +319,66 @@ def create_ui():
268
  with gr.Tabs():
269
  # ============= TAB 1: Voice Conversion =============
270
  with gr.Tab("🎤 Voice Conversion", id=0):
271
- gr.Markdown("### 🔊 Upload audio and select your model.")
272
-
273
  with gr.Row():
274
- with gr.Column(scale=2):
 
275
  audio_input = gr.File(
276
- label="Upload Audio (WAV, MP3, OGG)",
277
  file_count="multiple",
278
  type="filepath"
279
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
  with gr.Column(scale=1):
 
281
  with gr.Group():
282
- model_file = gr.File(label="Upload .pth Model", type="filepath", height=100)
283
- index_file = gr.File(label="Upload .index File (Optional)", type="filepath", height=100)
284
-
285
- # Download Model Section
286
- gr.Markdown("📥 Or download model from URL:")
287
- with gr.Row():
288
- use_url = gr.Checkbox(label="🌐 Use Model URL", value=False)
289
- with gr.Group(visible=False) as url_group:
290
- gr.Markdown(
291
- "🔗 Paste Hugging Face link(s):<br>"
292
- "• Single ZIP: `https://hf.co/.../model.zip`<br>"
293
- "• Two links: `https://hf.co/.../model.pth, https://hf.co/.../model.index`"
294
- )
295
- model_url = gr.Textbox(
296
- placeholder="https://huggingface.co/...",
297
- label="Model URL(s)",
298
- lines=1
299
- )
300
- download_btn = gr.Button("⬇️ Download Model", variant="secondary")
301
-
302
- download_btn.click(
303
- download_model,
304
- inputs=[model_url],
305
- outputs=[model_file, index_file]
306
- )
307
- use_url.change(
308
- lambda x: gr.update(visible=x),
309
- inputs=[use_url],
310
- outputs=[url_group]
311
- )
312
-
313
- # Parameters
314
- gr.Markdown("### ⚙️ Conversion Settings")
315
- with gr.Row():
316
- pitch_algo = gr.Dropdown(PITCH_ALGO_OPT, value="rmvpe+", label="Pitch Algorithm")
317
- pitch_level = gr.Slider(-24, 24, value=0, step=1, label="Pitch Level")
318
- with gr.Row():
319
- index_rate = gr.Slider(0, 1, value=0.75, label="Index Influence")
320
- filter_radius = gr.Slider(0, 7, value=3, step=1, label="Median Filter")
321
- with gr.Row():
322
- rms_mix_rate = gr.Slider(0, 1, value=0.25, label="Volume Envelope")
323
- protect = gr.Slider(0, 0.5, value=0.5, label="Consonant Protection")
324
-
325
- # Post-processing
326
- with gr.Row():
327
- denoise = gr.Checkbox(False, label="🔇 Denoise Output")
328
- reverb = gr.Checkbox(False, label="🎛️ Add Reverb")
329
-
330
- # Run Button
331
- convert_btn = gr.Button("🚀 Convert Voice", variant="primary")
332
- output_files = gr.File(label="✅ Converted Audio", file_count="multiple")
333
 
334
  convert_btn.click(
335
  run_conversion,
@@ -352,22 +401,28 @@ def create_ui():
352
  # ============= TAB 2: Text-to-Speech =============
353
  with gr.Tab("🗣️ Text-to-Speech", id=1):
354
  gr.Markdown("### Convert text to speech using Edge TTS.")
 
 
 
 
 
 
 
 
355
 
356
- tts_voice_list = sorted(
357
- [f"{v['ShortName']}-{v['Gender']}" for v in asyncio.run(edge_tts.list_voices())]
358
- )
359
  with gr.Row():
360
- with gr.Column():
361
  tts_text = gr.Textbox(
362
- placeholder="Type your message here...",
363
  label="Text Input",
364
- lines=4
365
  )
366
- tts_voice = gr.Dropdown(tts_voice_list, value=tts_voice_list[0], label="Voice")
367
  tts_play = gr.Checkbox(False, label="🎧 Auto-play audio")
368
  tts_btn = gr.Button("🔊 Generate Speech", variant="secondary")
369
- with gr.Column():
370
- tts_output_audio = gr.File(label="Generated Audio", type="filepath")
 
371
  tts_preview = gr.Audio(label="Preview", visible=False, autoplay=True)
372
 
373
  tts_btn.click(
@@ -380,7 +435,7 @@ def create_ui():
380
  outputs=[tts_preview]
381
  )
382
 
383
- # Examples
384
  gr.Markdown("### 📚 Examples")
385
  gr.Examples(
386
  examples=[
@@ -398,7 +453,6 @@ def create_ui():
398
 
399
  return app
400
 
401
-
402
  # --- Launch App ---
403
  if __name__ == "__main__":
404
  app = create_ui()
 
20
  from pydub import AudioSegment
21
  import noisereduce as nr
22
  import edge_tts
23
+ from huggingface_hub import hf_hub_download, HfApi # For robust HF link handling
24
 
25
  # Suppress logging
26
  logging.getLogger("infer_rvc_python").setLevel(logging.ERROR)
 
31
  # Theme & Title
32
  title = "<center><strong><font size='7'>🔊 RVC+</font></strong></center>"
33
  description = """
34
+ <div style="text-align: center; font-size: 1.1em; color: #aaa; margin: 10px 0;">
35
  This demo is for educational and research purposes only.<br>
36
  Misuse of voice conversion technology is unethical. Use responsibly.<br>
37
  Authors are not liable for inappropriate usage.
38
  </div>
39
  """
40
+ theme = "Thatguy099/Sonix" # Maintained as requested
41
 
42
  # Global constants
43
  PITCH_ALGO_OPT = ["pm", "harvest", "crepe", "rmvpe", "rmvpe+"]
 
48
  os.makedirs(DOWNLOAD_DIR, exist_ok=True)
49
  os.makedirs(OUTPUT_DIR, exist_ok=True)
50
 
 
51
  # --- Utility Functions ---
52
  def find_files(directory, exts=(".pth", ".index", ".zip")):
53
+ return [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith(exts)]
 
 
54
 
55
  def unzip_in_folder(zip_path, extract_to):
56
  with zipfile.ZipFile(zip_path, 'r') as zip_ref:
57
  for member in zip_ref.infolist():
58
  if not member.is_dir():
59
+ # Preserve filename, avoid path traversal
60
  member.filename = os.path.basename(member.filename)
61
  zip_ref.extract(member, extract_to)
62
 
 
63
  def get_file_size(url):
64
+ """Check file size for Hugging Face URLs, supporting resolve/main links."""
65
  if "huggingface" not in url.lower():
66
  raise ValueError("❌ Only Hugging Face links are allowed.")
67
  try:
68
+ # The Hugging Face Hub API can handle various link types
69
+ api = HfApi()
70
+ # Extract repo_id and filename from the URL
71
+ if "/resolve/main/" in url:
72
+ parts = url.split("/resolve/main/")
73
+ elif "/resolve/" in url:
74
+ # Handle specific branches
75
+ parts = url.split("/resolve/")
76
+ parts[1] = parts[1].split("/", 1)[1] # Remove branch name
77
+ else:
78
+ # Assume it's a blob link or direct file link
79
+ parts = url.rstrip("/").rsplit("/", 2)
80
+ if len(parts) == 3:
81
+ repo_parts = "/".join(parts[0].split("/")[-2:])
82
+ filename = parts[2]
83
+ repo_id = f"{parts[0].split('/')[-2]}/{parts[0].split('/')[-1]}"
84
+ file_info = api.repo_info(repo_id=repo_id, repo_type="model")
85
+ file_entry = next((f for f in file_info.siblings if f.rfilename == filename), None)
86
+ if not file_entry:
87
+ raise ValueError(f"❌ File '{filename}' not found in repository '{repo_id}'.")
88
+ file_size = file_entry.size
89
+ if file_size > MAX_FILE_SIZE:
90
+ raise ValueError(f"⚠️ File too large: {file_size / 1e6:.1f} MB (>500MB)")
91
+ return file_size
92
+ else:
93
+ raise ValueError("❌ Unable to parse Hugging Face URL.")
94
+
95
+ repo_parts = parts[0].split("/")[-2:]
96
+ repo_id = f"{repo_parts[0]}/{repo_parts[1]}"
97
+ filename = parts[1]
98
+
99
+ file_info = api.repo_info(repo_id=repo_id, repo_type="model")
100
+ file_entry = next((f for f in file_info.siblings if f.rfilename == filename), None)
101
+ if not file_entry:
102
+ raise ValueError(f"❌ File '{filename}' not found in repository '{repo_id}'.")
103
+
104
+ file_size = file_entry.size
105
  if file_size > MAX_FILE_SIZE:
106
  raise ValueError(f"⚠️ File too large: {file_size / 1e6:.1f} MB (>500MB)")
107
  return file_size
108
  except Exception as e:
109
  raise RuntimeError(f"❌ Failed to fetch file info: {str(e)}")
110
 
111
+ def clear_directory_later(directory, delay=30):
 
112
  """Clear temp directory after delay in a background thread."""
113
  def _clear():
114
  time.sleep(delay)
 
117
  print(f"🧹 Cleaned up: {directory}")
118
  threading.Thread(target=_clear, daemon=True).start()
119
 
 
120
  def find_model_and_index(directory):
121
  files = find_files(directory)
122
  model = next((f for f in files if f.endswith(".pth")), None)
123
  index = next((f for f in files if f.endswith(".index")), None)
124
  return model, index
125
 
 
126
  # --- Model Download Handler ---
127
  @spaces.GPU(duration=60)
128
  def download_model(url_data):
129
  if not url_data.strip():
130
  raise ValueError("❌ No URL provided.")
131
 
132
+ urls = [u.strip() for u in url_data.split(",") if u.strip()]
133
  if len(urls) > 2:
134
  raise ValueError("❌ Provide up to two URLs (model.pth, index.index).")
135
 
 
142
  os.makedirs(directory, exist_ok=True)
143
 
144
  try:
145
+ downloaded_files = []
146
  for url in urls:
147
+ # Use the robust Hugging Face Hub library for download
148
+ # This handles resolve/main, blob, and other link types seamlessly
149
+ parsed_url = urllib.parse.urlparse(url)
150
+ path_parts = parsed_url.path.strip("/").split("/")
151
+ if len(path_parts) < 4:
152
+ raise ValueError("❌ Invalid Hugging Face URL structure.")
153
+ repo_id = f"{path_parts[0]}/{path_parts[1]}"
154
+ revision = "main"
155
+ if "resolve" in path_parts:
156
+ resolve_idx = path_parts.index("resolve")
157
+ if resolve_idx + 1 < len(path_parts):
158
+ revision = path_parts[resolve_idx + 1]
159
+ filename = "/".join(path_parts[resolve_idx + 2:])
160
+ else:
161
+ # Assume it's a blob link pointing to a file
162
+ filename = path_parts[-1]
163
+ # Download the file
164
+ local_path = hf_hub_download(
165
+ repo_id=repo_id,
166
+ filename=filename,
167
+ revision=revision,
168
+ cache_dir=directory,
169
+ local_dir=directory,
170
+ local_dir_use_symlinks=False
171
+ )
172
+ downloaded_files.append(local_path)
173
 
174
  # Unzip if needed
175
  for f in find_files(directory, (".zip",)):
 
194
  shutil.rmtree(directory, ignore_errors=True)
195
  raise gr.Error(f"❌ Download failed: {str(e)}")
196
 
 
197
  # --- Audio Processing ---
198
  def apply_noisereduce(audio_paths):
199
  results = []
 
218
  results.append(path)
219
  return results
220
 
 
221
  def apply_audio_effects(audio_paths):
222
  results = []
223
  board = Pedalboard([
 
241
  results.append(path)
242
  return results
243
 
 
244
  # --- TTS Handler ---
245
  async def generate_tts(text, voice, output_path):
246
  communicate = edge_tts.Communicate(text, voice.split("-")[0])
247
  await communicate.save(output_path)
248
 
 
249
  def infer_tts(tts_voice, tts_text, play_tts):
250
  if not tts_text.strip():
251
  raise ValueError("❌ Text is empty.")
 
262
  except Exception as e:
263
  raise gr.Error(f"TTS generation failed: {str(e)}")
264
 
 
265
  # --- Main Conversion Function ---
266
  @spaces.GPU()
267
  def run_conversion(
 
310
 
311
  return results
312
 
 
313
  # --- Gradio UI Builder ---
314
  def create_ui():
315
  with gr.Blocks(theme=theme, title="RVC+", fill_width=True, delete_cache=(3200, 3200)) as app:
 
319
  with gr.Tabs():
320
  # ============= TAB 1: Voice Conversion =============
321
  with gr.Tab("🎤 Voice Conversion", id=0):
 
 
322
  with gr.Row():
323
+ with gr.Column(scale=1):
324
+ gr.Markdown("### 🔊 Upload Audio")
325
  audio_input = gr.File(
326
+ label="Audio Files (WAV, MP3, OGG)",
327
  file_count="multiple",
328
  type="filepath"
329
  )
330
+
331
+ gr.Markdown("### 📥 Load Model")
332
+ model_file = gr.File(label="Upload .pth Model", type="filepath", file_count=1)
333
+ index_file = gr.File(label="Upload .index File (Optional)", type="filepath", file_count=1)
334
+
335
+ use_url = gr.Checkbox(label="🌐 Download from Hugging Face URL", value=False)
336
+
337
+ with gr.Group(visible=False) as url_group:
338
+ gr.Markdown(
339
+ "🔗 Paste Hugging Face link(s):<br>"
340
+ "• Direct ZIP: `https://hf.co/user/repo/resolve/main/model.zip`<br>"
341
+ "• Separate files: `https://hf.co/user/repo/resolve/main/model.pth, https://hf.co/user/repo/resolve/main/model.index`"
342
+ )
343
+ model_url = gr.Textbox(
344
+ placeholder="https://huggingface.co/user/repo/resolve/main/file.pth",
345
+ label="Model URL(s)",
346
+ lines=2
347
+ )
348
+ download_btn = gr.Button("⬇️ Download Model", variant="secondary")
349
+
350
+ use_url.change(
351
+ lambda x: gr.update(visible=x),
352
+ inputs=[use_url],
353
+ outputs=[url_group]
354
+ )
355
+
356
+ download_btn.click(
357
+ download_model,
358
+ inputs=[model_url],
359
+ outputs=[model_file, index_file]
360
+ ).then(
361
+ lambda: gr.update(visible=False), # Hide URL group after download
362
+ outputs=[url_group]
363
+ )
364
+
365
  with gr.Column(scale=1):
366
+ gr.Markdown("### ⚙️ Conversion Settings")
367
  with gr.Group():
368
+ pitch_algo = gr.Dropdown(PITCH_ALGO_OPT, value="rmvpe+", label="Pitch Algorithm")
369
+ pitch_level = gr.Slider(-24, 24, value=0, step=1, label="Pitch Level")
370
+
371
+ index_rate = gr.Slider(0, 1, value=0.75, label="Index Influence")
372
+ filter_radius = gr.Slider(0, 7, value=3, step=1, label="Median Filter")
373
+
374
+ rms_mix_rate = gr.Slider(0, 1, value=0.25, label="Volume Envelope")
375
+ protect = gr.Slider(0, 0.5, value=0.5, label="Consonant Protection")
376
+
377
+ denoise = gr.Checkbox(False, label="🔇 Denoise Output")
378
+ reverb = gr.Checkbox(False, label="🎛️ Add Reverb")
379
+
380
+ convert_btn = gr.Button("🚀 Convert Voice", variant="primary", size="lg")
381
+ output_files = gr.File(label="✅ Converted Audio", file_count="multiple")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
382
 
383
  convert_btn.click(
384
  run_conversion,
 
401
  # ============= TAB 2: Text-to-Speech =============
402
  with gr.Tab("🗣️ Text-to-Speech", id=1):
403
  gr.Markdown("### Convert text to speech using Edge TTS.")
404
+
405
+ # Pre-fetch voices outside the main event loop if possible, or handle async properly
406
+ try:
407
+ tts_voice_list = sorted(
408
+ [f"{v['ShortName']}-{v['Gender']}" for v in asyncio.run(edge_tts.list_voices())]
409
+ )
410
+ except:
411
+ tts_voice_list = ["en-US-JennyNeural-Female"] # Fallback
412
 
 
 
 
413
  with gr.Row():
414
+ with gr.Column(scale=1):
415
  tts_text = gr.Textbox(
416
+ placeholder="Enter your text here...",
417
  label="Text Input",
418
+ lines=5
419
  )
420
+ tts_voice = gr.Dropdown(tts_voice_list, value=tts_voice_list[0] if tts_voice_list else None, label="Voice")
421
  tts_play = gr.Checkbox(False, label="🎧 Auto-play audio")
422
  tts_btn = gr.Button("🔊 Generate Speech", variant="secondary")
423
+
424
+ with gr.Column(scale=1):
425
+ tts_output_audio = gr.File(label="Download Audio", type="filepath")
426
  tts_preview = gr.Audio(label="Preview", visible=False, autoplay=True)
427
 
428
  tts_btn.click(
 
435
  outputs=[tts_preview]
436
  )
437
 
438
+ # Examples (Moved to be more prominent)
439
  gr.Markdown("### 📚 Examples")
440
  gr.Examples(
441
  examples=[
 
453
 
454
  return app
455
 
 
456
  # --- Launch App ---
457
  if __name__ == "__main__":
458
  app = create_ui()