dilaksh06 commited on
Commit
3a2b5e7
·
1 Parent(s): 61abcd2
Files changed (1) hide show
  1. app.py +2 -4
app.py CHANGED
@@ -8,9 +8,9 @@ MODEL_NAME = "ai4bharat/indictrans2-indic-indic-1B"
8
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
9
  model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME, trust_remote_code=True)
10
 
11
- # Supported languages: full name -> 3-letter code (IndicTrans2)
12
  LANGUAGES = {
13
- "Assamese": "asm",
14
  "Bengali": "ben",
15
  "Gujarati": "guj",
16
  "Hindi": "hin",
@@ -34,11 +34,9 @@ def translate(text: str, src_lang_name: str, tgt_lang_name: str) -> str:
34
  src_lang = LANGUAGES[src_lang_name]
35
  tgt_lang = LANGUAGES[tgt_lang_name]
36
 
37
- # Format input as required by IndicTrans2
38
  formatted_text = f"{src_lang}>>{tgt_lang} {text}"
39
  inputs = tokenizer(formatted_text, return_tensors="pt")
40
 
41
- # Generate translations
42
  output_tokens = model.generate(**inputs, max_length=512)
43
  translation = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
44
 
 
8
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
9
  model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME, trust_remote_code=True)
10
 
11
+ # Supported languages: full name -> code
12
  LANGUAGES = {
13
+ "Assamese": "asm",
14
  "Bengali": "ben",
15
  "Gujarati": "guj",
16
  "Hindi": "hin",
 
34
  src_lang = LANGUAGES[src_lang_name]
35
  tgt_lang = LANGUAGES[tgt_lang_name]
36
 
 
37
  formatted_text = f"{src_lang}>>{tgt_lang} {text}"
38
  inputs = tokenizer(formatted_text, return_tensors="pt")
39
 
 
40
  output_tokens = model.generate(**inputs, max_length=512)
41
  translation = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
42