Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pandas as pd | |
from huggingface_hub import hf_hub_download | |
# Download the metadata.csv | |
repo_id = "jacksonwambali/swahili-tts-dataset" # Your actual repo_id | |
metadata_path = hf_hub_download(repo_id=repo_id, filename="metadata.csv", repo_type="dataset") | |
df = pd.read_csv(metadata_path) | |
# Construct direct URLs to audio files on Hugging Face | |
def get_audio_url(file_name): | |
return f"https://huggingface.co/datasets/{repo_id}/resolve/main/wavs/{file_name}.wav" | |
df["audio_url"] = df["file_name"].apply(get_audio_url) | |
# Display function with custom styling | |
def display_dataset(): | |
html_content = """ | |
<style> | |
body, html { | |
margin: 0; | |
padding: 0; | |
height: 100%; | |
font-family: Arial, sans-serif; | |
overflow: auto; /* Ensure the body can scroll */ | |
} | |
.container { | |
background-image: url('https://i.imgur.com/your-vintage-audio-player-image.jpg'); | |
background-size: cover; | |
background-position: center; | |
padding: 20px; | |
color: #333; | |
min-height: 100vh; | |
display: flex; | |
flex-direction: column; | |
align-items: center; | |
} | |
h1 { | |
margin-bottom: 20px; | |
font-size: 2em; | |
text-align: center; | |
} | |
.table-wrapper { | |
max-height: 500px; /* Set a maximum height for the table */ | |
overflow-y: auto; /* Enable vertical scrolling */ | |
width: 80%; | |
border-radius: 10px; | |
} | |
table { | |
width: 100%; | |
border-collapse: collapse; | |
background: rgba(255, 255, 255, 0.8); | |
border-radius: 10px; | |
overflow: hidden; | |
} | |
th, td { | |
padding: 10px; | |
text-align: left; | |
border-bottom: 1px solid #ddd; | |
} | |
th { | |
background: #f4f4f4; | |
position: sticky; /* Keep headers fixed while scrolling */ | |
top: 0; | |
z-index: 1; | |
} | |
audio { | |
width: 200px; | |
} | |
</style> | |
<div class="container"> | |
<h1>Swahili TTS Dataset</h1> | |
<div class="table-wrapper"> | |
<table> | |
<tr> | |
<th>Audio</th> | |
<th>Transcript</th> | |
<th>Duration (s)</th> | |
</tr> | |
""" | |
for _, row in df.iterrows(): | |
audio_url = row["audio_url"] | |
transcript = row["transcript"] | |
duration = row["duration"] | |
html_content += f""" | |
<tr> | |
<td><audio controls><source src="{audio_url}" type="audio/wav"></audio></td> | |
<td>{transcript}</td> | |
<td>{duration:.2f}</td> | |
</tr> | |
""" | |
html_content += """ | |
</table> | |
</div> | |
</div> | |
""" | |
return html_content | |
# Launch the Gradio app | |
gr.Interface( | |
fn=display_dataset, | |
inputs=None, | |
outputs="html", | |
title="Swahili TTS Dataset Viewer", | |
description="Play Swahili audio clips and view their transcripts." | |
).launch() |