KeviniveK commited on
Commit
9e49187
·
verified ·
1 Parent(s): 33faaea

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +48 -22
  2. requirements.txt +5 -2
app.py CHANGED
@@ -1,30 +1,56 @@
1
  import streamlit as st
2
- from transformers import pipeline
 
 
 
3
 
4
- # Load the text classification model pipeline
5
- classifier = pipeline("text-classification",model='isom5240ust/bert-base-uncased-emotion', return_all_scores=True)
 
6
 
7
- # Streamlit application title
8
- st.title("Text Classification for you")
9
- st.write("Classification for 6 emotions: sadness, joy, love, anger, fear, surprise")
 
10
 
11
- # Text input for user to enter the text to classify
12
- text = st.text_area("Enter the text to classify", "")
 
13
 
14
- # Perform text classification when the user clicks the "Classify" button
15
- if st.button("Classify"):
16
- # Perform text classification on the input text
17
- results = classifier(text)[0]
18
 
19
- # Display the classification result
20
- max_score = float('-inf')
21
- max_label = ''
22
 
23
- for result in results:
24
- if result['score'] > max_score:
25
- max_score = result['score']
26
- max_label = result['label']
27
 
28
- st.write("Text:", text)
29
- st.write("Label:", max_label)
30
- st.write("Score:", max_score)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ from transformers import pipeline, AutoProcessor, AutoModel
3
+ from scipy.io.wavfile import write as write_wav
4
+ import numpy as np
5
+ import torch
6
 
7
+ def img2text(image_path):
8
+ img2caption = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
9
+ return img2caption(image_path)[0]['generated_text']
10
 
11
+ def text2story(text):
12
+ pipe = pipeline("text-generation", model="pranavpsv/genre-story-generator-v2")
13
+ story_text = pipe(text, max_length=100)[0]['generated_text']
14
+ return story_text
15
 
16
+ def text2audio(story_text):
17
+ processor = AutoProcessor.from_pretrained("facebook/mms-tts-eng")
18
+ model = AutoModel.from_pretrained("facebook/mms-tts-eng")
19
 
20
+ inputs = processor(text=story_text, return_tensors="pt")
21
+ with torch.no_grad():
22
+ output = model(**inputs).waveform
 
23
 
24
+ audio_array = output.cpu().numpy().squeeze()
25
+ sample_rate = 16000
 
26
 
27
+ return audio_array, sample_rate
 
 
 
28
 
29
+ # Streamlit UI
30
+ st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜")
31
+ st.header("Turn Your Image to Audio Story")
32
+ uploaded_file = st.file_uploader("Select an Image...")
33
+
34
+ if uploaded_file is not None:
35
+ bytes_data = uploaded_file.getvalue()
36
+ with open(uploaded_file.name, "wb") as file:
37
+ file.write(bytes_data)
38
+ st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)
39
+
40
+ # Stage 1: Image to Text
41
+ st.text('Processing img2text...')
42
+ scenario = img2text(uploaded_file.name)
43
+ st.write(scenario)
44
+
45
+ # Stage 2: Text to Story
46
+ st.text('Generating a story...')
47
+ story = text2story(scenario)
48
+ st.write(story)
49
+
50
+ # Stage 3: Story to Audio
51
+ st.text('Generating audio...')
52
+ audio_array, sample_rate = text2audio(story)
53
+
54
+ audio_file = "output_audio.wav"
55
+ write_wav(audio_file, sample_rate, audio_array)
56
+ st.audio(audio_file)
requirements.txt CHANGED
@@ -1,3 +1,6 @@
1
- spaces
2
  transformers
3
- torch
 
 
 
 
1
+ streamlit
2
  transformers
3
+ torch
4
+ scipy
5
+ soundfile
6
+ numpy