|
import streamlit as st |
|
import torch |
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
|
import time |
|
from typing import Optional |
|
import json |
|
|
|
|
|
st.set_page_config( |
|
page_title="Hebrew-Aramaic Translator", |
|
page_icon="π", |
|
layout="wide", |
|
initial_sidebar_state="expanded" |
|
) |
|
|
|
|
|
st.markdown(""" |
|
<style> |
|
.main-header { |
|
font-size: 3rem; |
|
font-weight: 700; |
|
background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); |
|
-webkit-background-clip: text; |
|
-webkit-text-fill-color: transparent; |
|
text-align: center; |
|
margin-bottom: 2rem; |
|
} |
|
|
|
.sub-header { |
|
font-size: 1.2rem; |
|
color: #666; |
|
text-align: center; |
|
margin-bottom: 3rem; |
|
} |
|
|
|
.translation-box { |
|
background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%); |
|
padding: 2rem; |
|
border-radius: 15px; |
|
box-shadow: 0 8px 32px rgba(0,0,0,0.1); |
|
margin: 1rem 0; |
|
} |
|
|
|
.input-area { |
|
background: white; |
|
border-radius: 10px; |
|
padding: 1.5rem; |
|
box-shadow: 0 4px 16px rgba(0,0,0,0.05); |
|
} |
|
|
|
.output-area { |
|
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); |
|
color: white; |
|
border-radius: 10px; |
|
padding: 1.5rem; |
|
box-shadow: 0 4px 16px rgba(0,0,0,0.1); |
|
} |
|
|
|
.direction-selector { |
|
background: white; |
|
border-radius: 10px; |
|
padding: 1rem; |
|
box-shadow: 0 4px 16px rgba(0,0,0,0.05); |
|
margin-bottom: 1rem; |
|
} |
|
|
|
.stButton > button { |
|
background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); |
|
color: white; |
|
border: none; |
|
border-radius: 25px; |
|
padding: 0.75rem 2rem; |
|
font-weight: 600; |
|
transition: all 0.3s ease; |
|
} |
|
|
|
.stButton > button:hover { |
|
transform: translateY(-2px); |
|
box-shadow: 0 8px 25px rgba(102, 126, 234, 0.4); |
|
} |
|
|
|
.model-info { |
|
background: #f8f9fa; |
|
border-radius: 10px; |
|
padding: 1rem; |
|
margin: 1rem 0; |
|
border-left: 4px solid #667eea; |
|
} |
|
</style> |
|
""", unsafe_allow_html=True) |
|
|
|
@st.cache_resource |
|
def load_model(): |
|
"""Load the Hugging Face model and tokenizer with caching.""" |
|
model_name = "johnlockejrr/opus-mt-arc-heb" |
|
|
|
with st.spinner("Loading translation model..."): |
|
try: |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModelForSeq2SeqLM.from_pretrained(model_name) |
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
model.to(device) |
|
model.eval() |
|
|
|
return tokenizer, model, device |
|
except Exception as e: |
|
st.error(f"Error loading model: {str(e)}") |
|
return None, None, None |
|
|
|
def translate_text(text: str, direction: str, tokenizer, model, device: str, max_length: int = 512) -> Optional[str]: |
|
"""Translate text using the loaded model.""" |
|
if not text.strip(): |
|
return None |
|
|
|
try: |
|
|
|
if direction == "Hebrew to Aramaic": |
|
input_text = f"<he> {text}" |
|
else: |
|
input_text = f"<ar> {text}" |
|
|
|
|
|
inputs = tokenizer( |
|
input_text, |
|
return_tensors="pt", |
|
max_length=max_length, |
|
truncation=True, |
|
padding=True |
|
).to(device) |
|
|
|
|
|
with torch.no_grad(): |
|
outputs = model.generate( |
|
**inputs, |
|
max_length=max_length, |
|
num_beams=4, |
|
length_penalty=0.6, |
|
early_stopping=True, |
|
do_sample=False |
|
) |
|
|
|
|
|
translation = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
return translation |
|
|
|
except Exception as e: |
|
st.error(f"Translation error: {str(e)}") |
|
return None |
|
|
|
def main(): |
|
|
|
st.markdown('<h1 class="main-header">π Samaritan Hebrew-Aramaic Translator</h1>', unsafe_allow_html=True) |
|
st.markdown('<p class="sub-header">Powered by the johnlockejrr/opus-mt-arc-heb model</p>', unsafe_allow_html=True) |
|
|
|
|
|
tokenizer, model, device = load_model() |
|
|
|
if tokenizer is None or model is None: |
|
st.error("Failed to load the translation model. Please check your internet connection and try again.") |
|
return |
|
|
|
|
|
with st.sidebar: |
|
st.markdown("### βοΈ Settings") |
|
|
|
|
|
max_length = st.slider( |
|
"Maximum Output Length", |
|
min_value=64, |
|
max_value=512, |
|
value=256, |
|
step=32, |
|
help="Maximum length of the generated translation" |
|
) |
|
|
|
|
|
st.markdown("### π Model Information") |
|
st.markdown(f"**Model:** johnlockejrr/opus-mt-arc-heb") |
|
st.markdown(f"**Device:** {device.upper()}") |
|
st.markdown(f"**Tokenizer:** {tokenizer.__class__.__name__}") |
|
st.markdown(f"**Model Type:** {model.__class__.__name__}") |
|
st.markdown(f"**Direction:** Samaritan Hebrew β Samaritan Aramaic") |
|
|
|
|
|
if st.button("ποΈ Clear All"): |
|
st.rerun() |
|
|
|
|
|
col1, col2 = st.columns([1, 1]) |
|
|
|
with col1: |
|
st.markdown('<div class="input-area">', unsafe_allow_html=True) |
|
st.markdown("### π Input Text") |
|
|
|
|
|
input_text = st.text_area( |
|
"Enter Samaritan Hebrew text to translate", |
|
height=200, |
|
placeholder="Enter your Samaritan Hebrew text here...", |
|
help="Type or paste the Samaritan Hebrew text you want to translate to Samaritan Aramaic" |
|
) |
|
|
|
|
|
translate_button = st.button( |
|
"π Translate to Samaritan Aramaic", |
|
type="primary", |
|
use_container_width=True |
|
) |
|
st.markdown('</div>', unsafe_allow_html=True) |
|
|
|
with col2: |
|
st.markdown('<div class="output-area">', unsafe_allow_html=True) |
|
st.markdown("### π― Samaritan Aramaic Translation") |
|
|
|
if translate_button and input_text.strip(): |
|
with st.spinner("Translating to Samaritan Aramaic..."): |
|
|
|
time.sleep(0.5) |
|
|
|
translation = translate_text( |
|
input_text, |
|
"Hebrew to Aramaic", |
|
tokenizer, |
|
model, |
|
device, |
|
max_length |
|
) |
|
|
|
if translation: |
|
st.markdown(f"**Samaritan Aramaic:**") |
|
|
|
st.code(translation, language=None) |
|
else: |
|
st.error("Translation failed. Please try again.") |
|
else: |
|
st.markdown("*Samaritan Aramaic translation will appear here*") |
|
st.markdown('</div>', unsafe_allow_html=True) |
|
|
|
|
|
st.markdown("---") |
|
|
|
|
|
st.markdown("### π Batch Translation") |
|
st.markdown("Upload a text file with multiple Samaritan Hebrew lines to translate them all to Samaritan Aramaic.") |
|
|
|
uploaded_file = st.file_uploader( |
|
"Choose a text file", |
|
type=['txt'], |
|
help="Upload a .txt file with one Samaritan Hebrew text per line" |
|
) |
|
|
|
if uploaded_file is not None: |
|
try: |
|
|
|
content = uploaded_file.read().decode('utf-8') |
|
lines = [line.strip() for line in content.split('\n') if line.strip()] |
|
|
|
if lines: |
|
st.success(f"π Loaded {len(lines)} lines from {uploaded_file.name}") |
|
|
|
if st.button("π Translate All to Samaritan Aramaic", type="primary"): |
|
st.markdown("### π Batch Translation Results") |
|
|
|
|
|
progress_bar = st.progress(0) |
|
status_text = st.empty() |
|
|
|
results = [] |
|
for i, line in enumerate(lines): |
|
status_text.text(f"Translating line {i+1}/{len(lines)}: {line[:50]}...") |
|
|
|
translation = translate_text( |
|
line, |
|
"Hebrew to Aramaic", |
|
tokenizer, |
|
model, |
|
device, |
|
max_length |
|
) |
|
|
|
results.append({ |
|
'original': line, |
|
'translation': translation or "Translation failed" |
|
}) |
|
|
|
|
|
progress_bar.progress((i + 1) / len(lines)) |
|
|
|
status_text.text("β
Translation complete!") |
|
|
|
|
|
for i, result in enumerate(results): |
|
with st.expander(f"Line {i+1}: {result['original'][:50]}..."): |
|
st.markdown(f"**Samaritan Hebrew:** {result['original']}") |
|
st.markdown(f"**Samaritan Aramaic:** {result['translation']}") |
|
|
|
|
|
csv_content = "Samaritan Hebrew,Samaritan Aramaic\n" |
|
for result in results: |
|
csv_content += f'"{result["original"]}","{result["translation"]}"\n' |
|
|
|
st.download_button( |
|
label="π₯ Download Results as CSV", |
|
data=csv_content, |
|
file_name="samaritan_translations.csv", |
|
mime="text/csv" |
|
) |
|
|
|
except Exception as e: |
|
st.error(f"Error reading file: {str(e)}") |
|
|
|
|
|
st.markdown("---") |
|
st.markdown(""" |
|
<div style="text-align: center; color: #666; padding: 2rem;"> |
|
<p>Built with β€οΈ using Streamlit and Hugging Face Transformers</p> |
|
<p>Samaritan Hebrew to Samaritan Aramaic Translation</p> |
|
<p>Model: johnlockejrr/opus-mt-arc-heb</p> |
|
</div> |
|
""", unsafe_allow_html=True) |
|
|
|
if __name__ == "__main__": |
|
main() |