Spaces:

Guunk
/

Ttsfon

Running

App Files Files Community

Guunk commited on Jul 25

Commit

c8b5506

verified ·

1 Parent(s): f79ba01

Create app.py

Browse files

Files changed (1) hide show

app.py +70 -0

app.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import gradio as gr
+from transformers import AutoProcessor, VitsModel
+import torch
+import scipy.io.wavfile
+import tempfile
+# Load the Fon TTS model from Meta AI
+model = VitsModel.from_pretrained("facebook/mms-tts-fon")
+processor = AutoProcessor.from_pretrained("facebook/mms-tts-fon")
+# Automatically get sampling rate from model config
+sampling_rate = model.config.sampling_rate
+# Function to synthesize Fon audio from text
+def tts_fon(text):
+    inputs = processor(text, return_tensors="pt")
+    with torch.no_grad():
+        audio = model(**inputs).waveform[0].numpy()
+    # Save temporary WAV file using the model's sampling rate
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
+        scipy.io.wavfile.write(f.name, rate=sampling_rate, data=audio)
+        return f.name
+# Title and detailed description
+title = "🗣️ Fon Text-to-Speech (TTS) with Meta MMS"
+description = """
+This Space uses Meta AI's `facebook/mms-tts-fon` model to synthesize speech in the Fon language.
+The model is part of the [Massively Multilingual Speech (MMS)](https://huggingface.co/facebook/mms-tts-fon) project.
+Fon is a Gbe language spoken in Benin and Togo. This demo allows you to input Fon text and hear the synthesized audio output.
+---
+### 🔧 How to Use:
+1. Type a sentence in **Fon** (Latin script, tone markers optional).
+2. Press **Submit** or hit **Enter**.
+3. Wait a few seconds for audio synthesis.
+4. Listen or download the audio.
+---
+### 📜 Rules & Notes:
+1. Input should be in **Fon** only (avoid English or other languages).
+2. You may enter as much text as you want, but long inputs may slow processing. Short to medium sentences are recommended.
+3. Use correct Unicode characters (ɛ, ɔ, etc.) if tones are important.
+4. Tone marks like `à, é, ǒ, ê` are supported but optional.
+5. Output uses a single female voice (pretrained by Meta).
+6. Audio is generated at the model’s default sampling rate (may vary by version).
+7. Model is intended for **research and demonstration** only.
+8. Do **not** use for commercial purposes without permission.
+9. Underlying model licensed under **CC-BY-NC 4.0**.
+10. Please be respectful — offensive or inappropriate input is not allowed.
+---
+✨ Powered by Meta AI's MMS-TTS and Hugging Face 🤗
+"""
+# Gradio interface
+iface = gr.Interface(
+    fn=tts_fon,
+    inputs=gr.Textbox(label="Enter Fon text here", placeholder="e.g. Fɔ̀ngbè sɔ̀ wá kpɔ́ nù.", lines=3),
+    outputs=gr.Audio(label="Synthesized Fon Speech", type="filepath"),
+    title=title,
+    description=description,
+    theme="default"
+)
+iface.launch()