Spaces:
Running
Running
import gradio as gr | |
import re | |
from pathlib import Path | |
import jaconv | |
import torch | |
from PIL import Image | |
from transformers import AutoTokenizer, AutoImageProcessor, AutoModelForVision2Seq | |
pretrained_model_name_or_path="jzhang533/manga-ocr-base-2025" | |
feature_extractor = AutoImageProcessor.from_pretrained(pretrained_model_name_or_path, use_fast=True) | |
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path) | |
model = AutoModelForVision2Seq.from_pretrained(pretrained_model_name_or_path) | |
def post_process(text): | |
text = "".join(text.split()) | |
text = text.replace("…", "...") | |
text = re.sub("[・.]{2,}", lambda x: (x.end() - x.start()) * ".", text) | |
text = jaconv.h2z(text, ascii=True, digit=True) | |
return text | |
def inference(img_or_path): | |
if isinstance(img_or_path, str) or isinstance(img_or_path, Path): | |
img = Image.open(img_or_path) | |
elif isinstance(img_or_path, Image.Image): | |
img = img_or_path | |
else: | |
raise ValueError(f"img_or_path must be a path or PIL.Image, instead got: {img_or_path}") | |
pixel_values = feature_extractor(img, return_tensors="pt").pixel_values | |
x = pixel_values.squeeze() | |
x = model.generate(x[None], max_length=300)[0].cpu() | |
x = tokenizer.decode(x, skip_special_tokens=True) | |
x = post_process(x) | |
return x | |
title = 'MangaOCR demo' | |
description = ''' | |
- This is derived from : <https://github.com/kha-white/manga-ocr> | |
- The model being used : <https://huggingface.co/jzhang533/manga-ocr-base-2025> (trained using scripts in [kha-white/manga-ocr](https://github.com/kha-white/manga-ocr) with several tweaks) | |
- Dataset being used to train the model: [manga109-s](http://www.manga109.org/en/download_s.html) and synthetic data. | |
''' | |
examples = [ | |
['00.jpg'], | |
['01.jpg'], | |
['02.jpg'], | |
['03.jpg'], | |
['04.jpg'], | |
['05.jpg'], | |
['06.jpg'], | |
['07.jpg'], | |
] | |
gr.Interface( | |
inference, | |
inputs=[ | |
gr.Image(label="Upload Japanese Manga Image", type="filepath") | |
], | |
outputs="text", | |
title=title, | |
description=description, | |
examples=examples, | |
).launch() | |