Minecraft / app.py
KeviniveK's picture
Upload 2 files
9e49187 verified
import streamlit as st
from transformers import pipeline, AutoProcessor, AutoModel
from scipy.io.wavfile import write as write_wav
import numpy as np
import torch
def img2text(image_path):
img2caption = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
return img2caption(image_path)[0]['generated_text']
def text2story(text):
pipe = pipeline("text-generation", model="pranavpsv/genre-story-generator-v2")
story_text = pipe(text, max_length=100)[0]['generated_text']
return story_text
def text2audio(story_text):
processor = AutoProcessor.from_pretrained("facebook/mms-tts-eng")
model = AutoModel.from_pretrained("facebook/mms-tts-eng")
inputs = processor(text=story_text, return_tensors="pt")
with torch.no_grad():
output = model(**inputs).waveform
audio_array = output.cpu().numpy().squeeze()
sample_rate = 16000
return audio_array, sample_rate
# Streamlit UI
st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜")
st.header("Turn Your Image to Audio Story")
uploaded_file = st.file_uploader("Select an Image...")
if uploaded_file is not None:
bytes_data = uploaded_file.getvalue()
with open(uploaded_file.name, "wb") as file:
file.write(bytes_data)
st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)
# Stage 1: Image to Text
st.text('Processing img2text...')
scenario = img2text(uploaded_file.name)
st.write(scenario)
# Stage 2: Text to Story
st.text('Generating a story...')
story = text2story(scenario)
st.write(story)
# Stage 3: Story to Audio
st.text('Generating audio...')
audio_array, sample_rate = text2audio(story)
audio_file = "output_audio.wav"
write_wav(audio_file, sample_rate, audio_array)
st.audio(audio_file)