Spaces:
Sleeping
Sleeping
import streamlit as st | |
import torch | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
import time | |
from typing import Optional | |
import json | |
# Page configuration | |
st.set_page_config( | |
page_title="Masoretic Hebrew to Targumic Aramaic Translation", | |
page_icon="π", | |
layout="wide", | |
initial_sidebar_state="expanded" | |
) | |
# Custom CSS for modern styling | |
st.markdown(""" | |
<style> | |
.main-header { | |
font-size: 3rem; | |
font-weight: 700; | |
background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); | |
-webkit-background-clip: text; | |
-webkit-text-fill-color: transparent; | |
text-align: center; | |
margin-bottom: 2rem; | |
} | |
.sub-header { | |
font-size: 1.2rem; | |
color: #666; | |
text-align: center; | |
margin-bottom: 3rem; | |
} | |
.translation-box { | |
background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%); | |
padding: 2rem; | |
border-radius: 15px; | |
box-shadow: 0 8px 32px rgba(0,0,0,0.1); | |
margin: 1rem 0; | |
} | |
.input-area { | |
background: white; | |
border-radius: 10px; | |
padding: 1.5rem; | |
box-shadow: 0 4px 16px rgba(0,0,0,0.05); | |
} | |
.output-area { | |
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
color: white; | |
border-radius: 10px; | |
padding: 1.5rem; | |
box-shadow: 0 4px 16px rgba(0,0,0,0.1); | |
} | |
.direction-selector { | |
background: white; | |
border-radius: 10px; | |
padding: 1rem; | |
box-shadow: 0 4px 16px rgba(0,0,0,0.05); | |
margin-bottom: 1rem; | |
} | |
.stButton > button { | |
background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); | |
color: white; | |
border: none; | |
border-radius: 25px; | |
padding: 0.75rem 2rem; | |
font-weight: 600; | |
transition: all 0.3s ease; | |
} | |
.stButton > button:hover { | |
transform: translateY(-2px); | |
box-shadow: 0 8px 25px rgba(102, 126, 234, 0.4); | |
} | |
.model-info { | |
background: #f8f9fa; | |
border-radius: 10px; | |
padding: 1rem; | |
margin: 1rem 0; | |
border-left: 4px solid #667eea; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
def load_model(): | |
"""Load the Hugging Face model and tokenizer with caching.""" | |
model_name = "johnlockejrr/marianmt-he2arc-targum" | |
with st.spinner("Loading translation model..."): | |
try: | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name) | |
# Move to GPU if available | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
model.to(device) | |
model.eval() | |
return tokenizer, model, device | |
except Exception as e: | |
st.error(f"Error loading model: {str(e)}") | |
return None, None, None | |
def translate_text(text: str, direction: str, tokenizer, model, device: str, max_length: int = 512) -> Optional[str]: | |
"""Translate text using the loaded model.""" | |
if not text.strip(): | |
return None | |
try: | |
# Add language prefix based on direction (using the correct sem-sem model format) | |
if direction == "Hebrew to Aramaic": | |
input_text = f">>heb<< {text}" | |
else: # Aramaic to Hebrew | |
input_text = f">>arc<< {text}" | |
# Tokenize input | |
inputs = tokenizer( | |
input_text, | |
return_tensors="pt", | |
max_length=max_length, | |
truncation=True, | |
padding=True | |
).to(device) | |
# Generate translation | |
with torch.no_grad(): | |
outputs = model.generate( | |
**inputs, | |
max_length=max_length, | |
num_beams=4, | |
length_penalty=0.6, | |
early_stopping=True, | |
do_sample=False | |
) | |
# Decode output | |
translation = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
return translation | |
except Exception as e: | |
st.error(f"Translation error: {str(e)}") | |
return None | |
def main(): | |
# Header | |
st.markdown('<h1 class="main-header">π Masoretic Hebrew-Aramaic Translator</h1>', unsafe_allow_html=True) | |
st.markdown('<p class="sub-header">Powered by the johnlockejrr/marianmt-he2arc-targum model</p>', unsafe_allow_html=True) | |
# Load model | |
tokenizer, model, device = load_model() | |
if tokenizer is None or model is None: | |
st.error("Failed to load the translation model. Please check your internet connection and try again.") | |
return | |
# Sidebar for settings | |
with st.sidebar: | |
st.markdown("### βοΈ Settings") | |
# Max length setting | |
max_length = st.slider( | |
"Maximum Output Length", | |
min_value=64, | |
max_value=512, | |
value=256, | |
step=32, | |
help="Maximum length of the generated translation" | |
) | |
# Model info | |
st.markdown("### π Model Information") | |
st.markdown(f"**Model:** johnlockejrr/marianmt-he2arc-targum") | |
st.markdown(f"**Device:** {device.upper()}") | |
st.markdown(f"**Tokenizer:** {tokenizer.__class__.__name__}") | |
st.markdown(f"**Model Type:** {model.__class__.__name__}") | |
st.markdown(f"**Direction:** Masoretic Hebrew β Targumic Aramaic") | |
# Clear button | |
if st.button("ποΈ Clear All"): | |
st.rerun() | |
# Main content area | |
col1, col2 = st.columns([1, 1]) | |
with col1: | |
st.markdown('<div class="input-area">', unsafe_allow_html=True) | |
st.markdown("### π Input Text") | |
# Text input | |
input_text = st.text_area( | |
"Enter Masoretic Hebrew text to translate", | |
height=200, | |
placeholder="Enter your Masoretic Hebrew text here...", | |
help="Type or paste the Masoretic Hebrew text you want to translate to Targumic Aramaic" | |
) | |
# Translate button | |
translate_button = st.button( | |
"π Translate to Targumic Aramaic", | |
type="primary", | |
use_container_width=True | |
) | |
st.markdown('</div>', unsafe_allow_html=True) | |
with col2: | |
st.markdown('<div class="output-area">', unsafe_allow_html=True) | |
st.markdown("### π― Targumic Aramaic Translation") | |
if translate_button and input_text.strip(): | |
with st.spinner("Translating to Targumic Aramaic..."): | |
# Add a small delay for better UX | |
time.sleep(0.5) | |
translation = translate_text( | |
input_text, | |
"Hebrew to Aramaic", | |
tokenizer, | |
model, | |
device, | |
max_length | |
) | |
if translation: | |
st.markdown(f"**Targumic Aramaic:**") | |
# Display translation in a code block that can be easily copied | |
st.code(translation, language=None) | |
else: | |
st.error("Translation failed. Please try again.") | |
else: | |
st.markdown("*Targumic Aramaic translation will appear here*") | |
st.markdown('</div>', unsafe_allow_html=True) | |
# Additional features | |
st.markdown("---") | |
# Batch translation section | |
st.markdown("### π Batch Translation") | |
st.markdown("Upload a text file with multiple Masoretic Hebrew lines to translate them all to Targumic Aramaic.") | |
uploaded_file = st.file_uploader( | |
"Choose a text file", | |
type=['txt'], | |
help="Upload a .txt file with one Masoretic Hebrew text per line" | |
) | |
if uploaded_file is not None: | |
try: | |
# Read file content | |
content = uploaded_file.read().decode('utf-8') | |
lines = [line.strip() for line in content.split('\n') if line.strip()] | |
if lines: | |
st.success(f"π Loaded {len(lines)} lines from {uploaded_file.name}") | |
if st.button("π Translate All to Targumic Aramaic", type="primary"): | |
st.markdown("### π Batch Translation Results") | |
# Create a progress bar | |
progress_bar = st.progress(0) | |
status_text = st.empty() | |
results = [] | |
for i, line in enumerate(lines): | |
status_text.text(f"Translating line {i+1}/{len(lines)}: {line[:50]}...") | |
translation = translate_text( | |
line, | |
"Hebrew to Aramaic", | |
tokenizer, | |
model, | |
device, | |
max_length | |
) | |
results.append({ | |
'original': line, | |
'translation': translation or "Translation failed" | |
}) | |
# Update progress | |
progress_bar.progress((i + 1) / len(lines)) | |
status_text.text("β Translation complete!") | |
# Display results | |
for i, result in enumerate(results): | |
with st.expander(f"Line {i+1}: {result['original'][:50]}..."): | |
st.markdown(f"**Masoretic Hebrew:** {result['original']}") | |
st.markdown(f"**Targumic Aramaic:** {result['translation']}") | |
# Download results | |
csv_content = "Masoretic Hebrew,Targumic Aramaic\n" | |
for result in results: | |
csv_content += f'"{result["original"]}","{result["translation"]}"\n' | |
st.download_button( | |
label="π₯ Download Results as CSV", | |
data=csv_content, | |
file_name="aramaic_translations.csv", | |
mime="text/csv" | |
) | |
except Exception as e: | |
st.error(f"Error reading file: {str(e)}") | |
# Footer | |
st.markdown("---") | |
st.markdown(""" | |
<div style="text-align: center; color: #666; padding: 2rem;"> | |
<p>Built with β€οΈ using Streamlit and Hugging Face Transformers</p> | |
<p>Masoretic Hebrew to Targumic Aramaic Translation</p> | |
<p>Model: johnlockejrr/marianmt-he2arc-targum</p> | |
</div> | |
""", unsafe_allow_html=True) | |
if __name__ == "__main__": | |
main() | |