|
import os |
|
import numpy as np |
|
import gradio as gr |
|
import assemblyai as aai |
|
from translate import Translator |
|
import uuid |
|
from elevenlabs import VoiceSettings |
|
from elevenlabs.client import ElevenLabs |
|
from pathlib import Path |
|
from scipy.io.wavfile import write, read |
|
import yt_dlp |
|
|
|
ELEVENLABS_API = os.environ.get("ELEVENLABS_API") |
|
ASSEMBLYAI_API = os.environ.get("ASSEMBLYAI_API") |
|
|
|
def voice_to_voice(audio_file): |
|
transcript = transcribe_audio(audio_file) |
|
if transcript.status == 'error': |
|
raise gr.Error(transcript.error) |
|
else: |
|
transcript = transcript.text |
|
|
|
list_translations = translate_text(transcript) |
|
generated_audio_paths = [] |
|
|
|
for translation in list_translations: |
|
translated_audio_file_name = text_to_speech(translation) |
|
path = Path(translated_audio_file_name) |
|
generated_audio_paths.append(path) |
|
|
|
return tuple(generated_audio_paths + list_translations) |
|
|
|
def transcribe_audio(audio_file): |
|
aai.settings.api_key = ASSEMBLYAI_API |
|
transcriber = aai.Transcriber() |
|
transcript = transcriber.transcribe(audio_file) |
|
return transcript |
|
|
|
def translate_text(text): |
|
languages = ["ru", "tr", "sv", "de", "es", "ja", "id"] |
|
list_translations = [] |
|
|
|
for lan in languages: |
|
translator = Translator(from_lang="en", to_lang=lan) |
|
translation = translator.translate(text) |
|
list_translations.append(translation) |
|
|
|
return list_translations |
|
|
|
def text_to_speech(text): |
|
client = ElevenLabs(api_key=ELEVENLABS_API) |
|
response = client.text_to_speech.convert( |
|
voice_id="<your-voice-id>", |
|
optimize_streaming_latency="0", |
|
output_format="mp3_22050_32", |
|
text=text, |
|
model_id="eleven_multilingual_v2", |
|
voice_settings=VoiceSettings( |
|
stability=0.5, |
|
similarity_boost=0.8, |
|
style=0.5, |
|
use_speaker_boost=True, |
|
), |
|
) |
|
|
|
save_file_path = f"{uuid.uuid4()}.mp3" |
|
with open(save_file_path, "wb") as f: |
|
for chunk in response: |
|
if chunk: |
|
f.write(chunk) |
|
|
|
return save_file_path |
|
|
|
def download_audio(url): |
|
ydl_opts = { |
|
'format': 'bestaudio/best', |
|
'outtmpl': 'ytdl/%(title)s.%(ext)s', |
|
'postprocessors': [{ |
|
'key': 'FFmpegExtractAudio', |
|
'preferredcodec': 'wav', |
|
'preferredquality': '192', |
|
}], |
|
} |
|
|
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl: |
|
info_dict = ydl.extract_info(url, download=True) |
|
file_path = ydl.prepare_filename(info_dict).rsplit('.', 1)[0] + '.wav' |
|
sample_rate, audio_data = read(file_path) |
|
audio_array = np.asarray(audio_data, dtype=np.int16) |
|
|
|
return file_path |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("## Audio Translator") |
|
gr.Markdown( |
|
""" |
|
The API Key you need: |
|
[AssemblyAI API key](https://www.assemblyai.com/?utm_source=youtube&utm_medium=referral&utm_campaign=yt_mis_66)<br> |
|
[Elevenlabs API key](https://elevenlabs.io/)<br> |
|
Note: you need at least 30 minutes of a voice recording of yourself for the *Professional voice cloning. But there is also a simpler voice cloning option that only requires 30 seconds of voice recording. *Professional voice cloning is a paid feature. |
|
""" |
|
) |
|
audio_input = gr.Audio(type="filepath", show_download_button=True) |
|
with gr.Accordion("Inputs by Link", open=False): |
|
with gr.Row(): |
|
link = gr.Textbox( |
|
label="Link", |
|
placeholder="Paste the link here", |
|
interactive=True |
|
) |
|
download_button = gr.Button( |
|
"Download!", |
|
variant="primary" |
|
) |
|
download_button.click(download_audio, [link], [audio_input]) |
|
submit = gr.Button("Submit", variant="primary") |
|
clear_button = gr.ClearButton(audio_input, "Clear") |
|
|
|
output_components = [] |
|
languages = ["Turkish", "Swedish", "Russian", "German", "Spanish", "Japanese", "Indonesian"] |
|
|
|
for lang in languages: |
|
with gr.Group(): |
|
output_components.append(gr.Audio(label=lang, interactive=False)) |
|
output_components.append(gr.Markdown()) |
|
|
|
submit.click(fn=voice_to_voice, inputs=audio_input, outputs=output_components, show_progress=True) |
|
|
|
|
|
demo.launch(server_port=7860) |
|
|