ACloudCenter commited on
Commit
009c9f3
·
1 Parent(s): 50a3943

Add API endpoint and fix deprecation warnings

Browse files

- Fixed torchaudio deprecation warning by removing torio import
- Added API endpoint for external calls at /generate
- Created example code for calling from other spaces
- Added VibeVoice integration helper class
- Made music generation accessible via Gradio Client API

Files changed (4) hide show
  1. api_usage_example.py +170 -0
  2. app.py +72 -0
  3. pipeline_ace_step.py +3 -2
  4. vibevoice_integration.py +250 -0
api_usage_example.py ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Example code for calling ACE-Music-Generator from another Hugging Face Space
3
+
4
+ This shows how to use the ACE-Music-Generator API from your podcast space
5
+ or any other Python application.
6
+ """
7
+
8
+ from gradio_client import Client
9
+ import tempfile
10
+ import requests
11
+
12
+ # Method 1: Using Gradio Client (Recommended for Spaces)
13
+ def generate_music_from_space(
14
+ duration=20,
15
+ tags="edm, synth, bass, 128 bpm, energetic",
16
+ lyrics="[instrumental]",
17
+ space_name="ACloudCenter/ACE-Music-Generator"
18
+ ):
19
+ """
20
+ Generate music using the ACE-Music-Generator space API
21
+
22
+ Args:
23
+ duration: Duration in seconds
24
+ tags: Music style tags
25
+ lyrics: Lyrics or [instrumental]
26
+ space_name: Your Hugging Face space name
27
+
28
+ Returns:
29
+ audio_file_path: Path to downloaded audio file
30
+ """
31
+ try:
32
+ # Connect to your space
33
+ client = Client(space_name)
34
+
35
+ # Call the generate function
36
+ result = client.predict(
37
+ duration,
38
+ tags,
39
+ lyrics,
40
+ 60, # infer_steps
41
+ 15.0, # guidance_scale
42
+ api_name="/generate"
43
+ )
44
+
45
+ # Result is the path to the audio file
46
+ return result
47
+ except Exception as e:
48
+ print(f"Error generating music: {e}")
49
+ return None
50
+
51
+
52
+ # Method 2: Direct HTTP API call
53
+ def generate_music_http(
54
+ duration=20,
55
+ tags="edm, synth, bass, 128 bpm, energetic",
56
+ lyrics="[instrumental]",
57
+ space_url="https://acloudcenter-ace-music-generator.hf.space"
58
+ ):
59
+ """
60
+ Generate music using direct HTTP API call
61
+
62
+ Args:
63
+ duration: Duration in seconds
64
+ tags: Music style tags
65
+ lyrics: Lyrics or [instrumental]
66
+ space_url: Your space URL
67
+
68
+ Returns:
69
+ audio_file_path: Path to downloaded audio file
70
+ """
71
+ import json
72
+
73
+ api_url = f"{space_url}/run/generate"
74
+
75
+ payload = {
76
+ "data": [
77
+ duration,
78
+ tags,
79
+ lyrics,
80
+ 60, # infer_steps
81
+ 15.0, # guidance_scale
82
+ ]
83
+ }
84
+
85
+ try:
86
+ # Make the API call
87
+ response = requests.post(api_url, json=payload)
88
+
89
+ if response.status_code == 200:
90
+ result = response.json()
91
+ # Download the audio file
92
+ audio_url = result["data"][0]["url"]
93
+
94
+ # Save to temp file
95
+ with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
96
+ audio_response = requests.get(audio_url)
97
+ f.write(audio_response.content)
98
+ return f.name
99
+ else:
100
+ print(f"Error: {response.status_code}")
101
+ return None
102
+
103
+ except Exception as e:
104
+ print(f"Error generating music: {e}")
105
+ return None
106
+
107
+
108
+ # Example usage in your podcast generator
109
+ def add_background_music_to_podcast():
110
+ """
111
+ Example of how to use in your podcast space
112
+ """
113
+
114
+ # Generate a 20-second EDM track
115
+ music_path = generate_music_from_space(
116
+ duration=20,
117
+ tags="edm, ambient, soft, background, 100 bpm, calm",
118
+ lyrics="[instrumental]"
119
+ )
120
+
121
+ if music_path:
122
+ print(f"Generated music saved to: {music_path}")
123
+ # Now you can use this in your podcast generation
124
+ # For example, mix it with your podcast audio
125
+ return music_path
126
+ else:
127
+ print("Failed to generate music")
128
+ return None
129
+
130
+
131
+ # Different music styles you can generate
132
+ MUSIC_STYLES = {
133
+ "podcast_intro": "upbeat, electronic, professional, 120 bpm, energetic, modern",
134
+ "podcast_outro": "calm, ambient, soft, 80 bpm, relaxing, fade out",
135
+ "news_background": "minimal, electronic, subtle, 90 bpm, serious, professional",
136
+ "commercial": "pop, upbeat, catchy, 128 bpm, happy, commercial",
137
+ "dramatic": "orchestral, dramatic, cinematic, 100 bpm, intense, emotional",
138
+ "tech": "electronic, futuristic, synth, 110 bpm, innovative, modern",
139
+ "chill": "lofi, relaxed, warm, 75 bpm, cozy, background",
140
+ }
141
+
142
+
143
+ def generate_podcast_music(style="podcast_intro", duration=15):
144
+ """
145
+ Generate music for different podcast segments
146
+
147
+ Args:
148
+ style: One of the predefined styles
149
+ duration: Duration in seconds
150
+
151
+ Returns:
152
+ audio_file_path: Path to generated audio
153
+ """
154
+ tags = MUSIC_STYLES.get(style, MUSIC_STYLES["podcast_intro"])
155
+
156
+ return generate_music_from_space(
157
+ duration=duration,
158
+ tags=tags,
159
+ lyrics="[instrumental]"
160
+ )
161
+
162
+
163
+ if __name__ == "__main__":
164
+ # Test the API
165
+ print("Generating test music...")
166
+ audio_file = generate_podcast_music(style="podcast_intro", duration=10)
167
+ if audio_file:
168
+ print(f"Success! Audio saved to: {audio_file}")
169
+ else:
170
+ print("Failed to generate audio")
app.py CHANGED
@@ -3,6 +3,7 @@ from ui.components import create_main_demo_ui
3
  from pipeline_ace_step import ACEStepPipeline
4
  from data_sampler import DataSampler
5
  import os
 
6
 
7
 
8
  parser = argparse.ArgumentParser()
@@ -30,12 +31,83 @@ def main(args):
30
  torch_compile=args.torch_compile
31
  )
32
  data_sampler = DataSampler()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
  demo = create_main_demo_ui(
35
  text2music_process_func=model_demo.__call__,
36
  sample_data_func=data_sampler.sample,
37
  load_data_func=data_sampler.load_json,
38
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  demo.queue(default_concurrency_limit=8).launch(
40
  server_name=args.server_name,
41
  server_port=args.port,
 
3
  from pipeline_ace_step import ACEStepPipeline
4
  from data_sampler import DataSampler
5
  import os
6
+ import gradio as gr
7
 
8
 
9
  parser = argparse.ArgumentParser()
 
31
  torch_compile=args.torch_compile
32
  )
33
  data_sampler = DataSampler()
34
+
35
+ # Create API function for external calls
36
+ def generate_music_api(
37
+ duration: float = 20.0,
38
+ tags: str = "edm, synth, bass, kick drum, 128 bpm, euphoric, pulsating, energetic, instrumental",
39
+ lyrics: str = "[instrumental]",
40
+ infer_steps: int = 60,
41
+ guidance_scale: float = 15.0,
42
+ ):
43
+ """
44
+ API function to generate music
45
+
46
+ Args:
47
+ duration: Duration in seconds (default 20)
48
+ tags: Music tags/style description
49
+ lyrics: Lyrics or [instrumental] for no vocals
50
+ infer_steps: Inference steps (default 60)
51
+ guidance_scale: Guidance scale (default 15.0)
52
+
53
+ Returns:
54
+ audio_path: Path to generated audio file
55
+ """
56
+ result = model_demo(
57
+ audio_duration=duration,
58
+ prompt=tags,
59
+ lyrics=lyrics,
60
+ infer_step=infer_steps,
61
+ guidance_scale=guidance_scale,
62
+ scheduler_type="euler",
63
+ cfg_type="apg",
64
+ omega_scale=10.0,
65
+ manual_seeds=None,
66
+ guidance_interval=0.5,
67
+ guidance_interval_decay=0.0,
68
+ min_guidance_scale=3.0,
69
+ use_erg_tag=True,
70
+ use_erg_lyric=False,
71
+ use_erg_diffusion=True,
72
+ oss_steps=None,
73
+ guidance_scale_text=0.0,
74
+ guidance_scale_lyric=0.0,
75
+ audio2audio_enable=False,
76
+ ref_audio_strength=0.5,
77
+ ref_audio_input=None,
78
+ lora_name_or_path="none"
79
+ )
80
+ # Return the audio file path
81
+ if result and len(result) > 0:
82
+ return result[0] # Return first audio output
83
+ return None
84
 
85
  demo = create_main_demo_ui(
86
  text2music_process_func=model_demo.__call__,
87
  sample_data_func=data_sampler.sample,
88
  load_data_func=data_sampler.load_json,
89
  )
90
+
91
+ # Add API endpoint to the demo
92
+ demo.api_open = True
93
+ demo.api_name = "/generate_music"
94
+
95
+ # Make the API function available
96
+ with demo:
97
+ gr.Interface(
98
+ fn=generate_music_api,
99
+ inputs=[
100
+ gr.Number(value=20, label="Duration (seconds)"),
101
+ gr.Textbox(value="edm, synth, bass, 128 bpm, energetic", label="Tags"),
102
+ gr.Textbox(value="[instrumental]", label="Lyrics"),
103
+ gr.Number(value=60, label="Inference Steps"),
104
+ gr.Number(value=15.0, label="Guidance Scale"),
105
+ ],
106
+ outputs=gr.Audio(type="filepath", label="Generated Music"),
107
+ api_name="generate",
108
+ visible=False # Hide this interface, it's only for API
109
+ )
110
+
111
  demo.queue(default_concurrency_limit=8).launch(
112
  server_name=args.server_name,
113
  server_port=args.port,
pipeline_ace_step.py CHANGED
@@ -36,7 +36,7 @@ from apg_guidance import (
36
  cfg_double_condition_forward,
37
  )
38
  import torchaudio
39
- import torio
40
 
41
 
42
  torch.backends.cudnn.benchmark = False
@@ -1428,12 +1428,13 @@ class ACEStepPipeline:
1428
  f"{base_path}/output_{time.strftime('%Y%m%d%H%M%S')}_{idx}.{format}"
1429
  )
1430
  target_wav = target_wav.float()
 
1431
  torchaudio.save(
1432
  output_path_flac,
1433
  target_wav,
1434
  sample_rate=sample_rate,
1435
  format=format,
1436
- compression=torio.io.CodecConfig(bit_rate=320000),
1437
  )
1438
  return output_path_flac
1439
 
 
36
  cfg_double_condition_forward,
37
  )
38
  import torchaudio
39
+ # import torio # Deprecated, removed to fix warning
40
 
41
 
42
  torch.backends.cudnn.benchmark = False
 
1428
  f"{base_path}/output_{time.strftime('%Y%m%d%H%M%S')}_{idx}.{format}"
1429
  )
1430
  target_wav = target_wav.float()
1431
+ # Use simple torchaudio.save without deprecated compression parameter
1432
  torchaudio.save(
1433
  output_path_flac,
1434
  target_wav,
1435
  sample_rate=sample_rate,
1436
  format=format,
1437
+ # compression parameter deprecated, format handles encoding
1438
  )
1439
  return output_path_flac
1440
 
vibevoice_integration.py ADDED
@@ -0,0 +1,250 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Integration code for VibeVoice-PodcastCreator to use ACE-Music-Generator
3
+
4
+ Add this to your VibeVoice space to generate background music on demand.
5
+ """
6
+
7
+ from gradio_client import Client
8
+ import numpy as np
9
+ from scipy.io import wavfile
10
+ import tempfile
11
+ import os
12
+
13
+
14
+ class MusicGenerator:
15
+ """
16
+ Music generator client for VibeVoice podcast creation
17
+ """
18
+
19
+ def __init__(self, space_name="ACloudCenter/ACE-Music-Generator"):
20
+ """Initialize connection to music generator space"""
21
+ try:
22
+ self.client = Client(space_name)
23
+ self.connected = True
24
+ except:
25
+ print("Warning: Could not connect to music generator. Music features disabled.")
26
+ self.connected = False
27
+
28
+ def generate_intro_music(self, duration=10):
29
+ """Generate intro music for podcast"""
30
+ if not self.connected:
31
+ return None
32
+
33
+ return self._generate(
34
+ duration=duration,
35
+ tags="podcast intro, upbeat, electronic, professional, 120 bpm, energetic",
36
+ lyrics="[instrumental]"
37
+ )
38
+
39
+ def generate_outro_music(self, duration=10):
40
+ """Generate outro music for podcast"""
41
+ if not self.connected:
42
+ return None
43
+
44
+ return self._generate(
45
+ duration=duration,
46
+ tags="podcast outro, calm, ambient, soft, 80 bpm, fade out, peaceful",
47
+ lyrics="[instrumental]"
48
+ )
49
+
50
+ def generate_background_music(self, style="ambient", duration=30):
51
+ """
52
+ Generate background music for podcast segments
53
+
54
+ Styles:
55
+ - ambient: Soft background music
56
+ - news: Professional news-style background
57
+ - dramatic: Intense, dramatic music
58
+ - tech: Futuristic tech music
59
+ - chill: Relaxed lofi music
60
+ """
61
+ styles = {
62
+ "ambient": "ambient, soft, background, minimal, 70 bpm, atmospheric",
63
+ "news": "news, professional, subtle, electronic, 90 bpm, serious",
64
+ "dramatic": "dramatic, orchestral, cinematic, 100 bpm, intense",
65
+ "tech": "electronic, futuristic, synth, 110 bpm, innovative",
66
+ "chill": "lofi, relaxed, warm, 75 bpm, cozy, mellow"
67
+ }
68
+
69
+ if not self.connected:
70
+ return None
71
+
72
+ tags = styles.get(style, styles["ambient"])
73
+ return self._generate(duration=duration, tags=tags, lyrics="[instrumental]")
74
+
75
+ def generate_commercial_jingle(self, duration=5):
76
+ """Generate a short commercial jingle"""
77
+ if not self.connected:
78
+ return None
79
+
80
+ return self._generate(
81
+ duration=duration,
82
+ tags="jingle, commercial, catchy, upbeat, 140 bpm, happy, memorable",
83
+ lyrics="[instrumental]"
84
+ )
85
+
86
+ def _generate(self, duration, tags, lyrics):
87
+ """Internal method to generate music"""
88
+ try:
89
+ result = self.client.predict(
90
+ duration,
91
+ tags,
92
+ lyrics,
93
+ 60, # infer_steps
94
+ 15.0, # guidance_scale
95
+ api_name="/generate"
96
+ )
97
+ return result
98
+ except Exception as e:
99
+ print(f"Error generating music: {e}")
100
+ return None
101
+
102
+ def mix_with_podcast(self, podcast_audio_path, music_path, music_volume=0.2):
103
+ """
104
+ Mix background music with podcast audio
105
+
106
+ Args:
107
+ podcast_audio_path: Path to podcast audio file
108
+ music_path: Path to music file
109
+ music_volume: Volume of music (0-1, lower = quieter background)
110
+
111
+ Returns:
112
+ mixed_audio_path: Path to mixed audio file
113
+ """
114
+ try:
115
+ # Load audio files
116
+ podcast_rate, podcast_data = wavfile.read(podcast_audio_path)
117
+ music_rate, music_data = wavfile.read(music_path)
118
+
119
+ # Ensure same sample rate
120
+ if podcast_rate != music_rate:
121
+ # Simple resampling (you might want to use librosa for better quality)
122
+ music_data = np.interp(
123
+ np.linspace(0, len(music_data), int(len(music_data) * podcast_rate / music_rate)),
124
+ np.arange(len(music_data)),
125
+ music_data
126
+ )
127
+
128
+ # Match lengths
129
+ if len(music_data) < len(podcast_data):
130
+ # Loop music if it's shorter
131
+ music_data = np.tile(music_data, (len(podcast_data) // len(music_data) + 1))
132
+ music_data = music_data[:len(podcast_data)]
133
+
134
+ # Mix audio
135
+ mixed = podcast_data + (music_data * music_volume)
136
+
137
+ # Normalize to prevent clipping
138
+ mixed = np.clip(mixed, -32768, 32767).astype(np.int16)
139
+
140
+ # Save mixed audio
141
+ output_path = tempfile.mktemp(suffix=".wav")
142
+ wavfile.write(output_path, podcast_rate, mixed)
143
+
144
+ return output_path
145
+
146
+ except Exception as e:
147
+ print(f"Error mixing audio: {e}")
148
+ return podcast_audio_path # Return original if mixing fails
149
+
150
+
151
+ # Example usage in VibeVoice generator
152
+ def enhance_podcast_with_music(podcast_generator):
153
+ """
154
+ Example of how to add this to your existing podcast generator
155
+ """
156
+
157
+ # Initialize music generator
158
+ music_gen = MusicGenerator()
159
+
160
+ # Your existing podcast generation code
161
+ # podcast_audio = podcast_generator.generate_podcast(...)
162
+
163
+ # Generate intro music
164
+ intro_music = music_gen.generate_intro_music(duration=5)
165
+
166
+ # Generate background music for main content
167
+ background_music = music_gen.generate_background_music(
168
+ style="ambient",
169
+ duration=60 # Adjust based on your podcast length
170
+ )
171
+
172
+ # Generate outro music
173
+ outro_music = music_gen.generate_outro_music(duration=5)
174
+
175
+ # Mix background music with podcast (optional)
176
+ # if background_music and podcast_audio:
177
+ # mixed_audio = music_gen.mix_with_podcast(
178
+ # podcast_audio,
179
+ # background_music,
180
+ # music_volume=0.1 # Keep it quiet in background
181
+ # )
182
+
183
+ return {
184
+ "intro": intro_music,
185
+ "background": background_music,
186
+ "outro": outro_music
187
+ }
188
+
189
+
190
+ # Quick function to add to your VibeVoice app.py
191
+ def add_music_generation_to_vibevoice():
192
+ """
193
+ Add this to your VibeVoice app.py to integrate music generation
194
+ """
195
+
196
+ # In your create_demo() function, add:
197
+ """
198
+ # Add music generator
199
+ music_gen = MusicGenerator()
200
+
201
+ # Add checkbox for music generation
202
+ with gr.Row():
203
+ add_intro_music = gr.Checkbox(label="Add Intro Music", value=False)
204
+ add_outro_music = gr.Checkbox(label="Add Outro Music", value=False)
205
+ add_background_music = gr.Checkbox(label="Add Background Music", value=False)
206
+ background_style = gr.Dropdown(
207
+ choices=["ambient", "news", "dramatic", "tech", "chill"],
208
+ value="ambient",
209
+ label="Background Music Style"
210
+ )
211
+
212
+ # In your generation function:
213
+ def generate_with_music(..., add_intro, add_outro, add_background, bg_style):
214
+ # Your existing generation code
215
+ podcast_audio = generate_podcast(...)
216
+
217
+ # Add music if requested
218
+ if add_intro:
219
+ intro = music_gen.generate_intro_music(5)
220
+ # Concatenate intro with podcast
221
+
222
+ if add_background:
223
+ bg_music = music_gen.generate_background_music(bg_style, duration=60)
224
+ # Mix with podcast audio
225
+
226
+ if add_outro:
227
+ outro = music_gen.generate_outro_music(5)
228
+ # Concatenate outro
229
+
230
+ return final_audio
231
+ """
232
+ pass
233
+
234
+
235
+ if __name__ == "__main__":
236
+ # Test the music generator
237
+ print("Testing music generator...")
238
+ music_gen = MusicGenerator()
239
+
240
+ print("Generating intro music...")
241
+ intro = music_gen.generate_intro_music(duration=5)
242
+ if intro:
243
+ print(f"Intro music saved to: {intro}")
244
+
245
+ print("Generating background music...")
246
+ background = music_gen.generate_background_music(style="ambient", duration=10)
247
+ if background:
248
+ print(f"Background music saved to: {background}")
249
+
250
+ print("Done!")