Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from transformers import AutoProcessor, VitsModel
|
3 |
+
import torch
|
4 |
+
import scipy.io.wavfile
|
5 |
+
import tempfile
|
6 |
+
|
7 |
+
# Load the Fon TTS model from Meta AI
|
8 |
+
model = VitsModel.from_pretrained("facebook/mms-tts-fon")
|
9 |
+
processor = AutoProcessor.from_pretrained("facebook/mms-tts-fon")
|
10 |
+
|
11 |
+
# Automatically get sampling rate from model config
|
12 |
+
sampling_rate = model.config.sampling_rate
|
13 |
+
|
14 |
+
# Function to synthesize Fon audio from text
|
15 |
+
def tts_fon(text):
|
16 |
+
inputs = processor(text, return_tensors="pt")
|
17 |
+
with torch.no_grad():
|
18 |
+
audio = model(**inputs).waveform[0].numpy()
|
19 |
+
|
20 |
+
# Save temporary WAV file using the model's sampling rate
|
21 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
|
22 |
+
scipy.io.wavfile.write(f.name, rate=sampling_rate, data=audio)
|
23 |
+
return f.name
|
24 |
+
|
25 |
+
# Title and detailed description
|
26 |
+
title = "🗣️ Fon Text-to-Speech (TTS) with Meta MMS"
|
27 |
+
description = """
|
28 |
+
This Space uses Meta AI's `facebook/mms-tts-fon` model to synthesize speech in the Fon language.
|
29 |
+
The model is part of the [Massively Multilingual Speech (MMS)](https://huggingface.co/facebook/mms-tts-fon) project.
|
30 |
+
|
31 |
+
Fon is a Gbe language spoken in Benin and Togo. This demo allows you to input Fon text and hear the synthesized audio output.
|
32 |
+
|
33 |
+
---
|
34 |
+
|
35 |
+
### 🔧 How to Use:
|
36 |
+
1. Type a sentence in **Fon** (Latin script, tone markers optional).
|
37 |
+
2. Press **Submit** or hit **Enter**.
|
38 |
+
3. Wait a few seconds for audio synthesis.
|
39 |
+
4. Listen or download the audio.
|
40 |
+
|
41 |
+
---
|
42 |
+
|
43 |
+
### 📜 Rules & Notes:
|
44 |
+
1. Input should be in **Fon** only (avoid English or other languages).
|
45 |
+
2. You may enter as much text as you want, but long inputs may slow processing. Short to medium sentences are recommended.
|
46 |
+
3. Use correct Unicode characters (ɛ, ɔ, etc.) if tones are important.
|
47 |
+
4. Tone marks like `à, é, ǒ, ê` are supported but optional.
|
48 |
+
5. Output uses a single female voice (pretrained by Meta).
|
49 |
+
6. Audio is generated at the model’s default sampling rate (may vary by version).
|
50 |
+
7. Model is intended for **research and demonstration** only.
|
51 |
+
8. Do **not** use for commercial purposes without permission.
|
52 |
+
9. Underlying model licensed under **CC-BY-NC 4.0**.
|
53 |
+
10. Please be respectful — offensive or inappropriate input is not allowed.
|
54 |
+
|
55 |
+
---
|
56 |
+
|
57 |
+
✨ Powered by Meta AI's MMS-TTS and Hugging Face 🤗
|
58 |
+
"""
|
59 |
+
|
60 |
+
# Gradio interface
|
61 |
+
iface = gr.Interface(
|
62 |
+
fn=tts_fon,
|
63 |
+
inputs=gr.Textbox(label="Enter Fon text here", placeholder="e.g. Fɔ̀ngbè sɔ̀ wá kpɔ́ nù.", lines=3),
|
64 |
+
outputs=gr.Audio(label="Synthesized Fon Speech", type="filepath"),
|
65 |
+
title=title,
|
66 |
+
description=description,
|
67 |
+
theme="default"
|
68 |
+
)
|
69 |
+
|
70 |
+
iface.launch()
|