demo-updated / pdf_manager.py
Kazel's picture
logging
d901124
from pdf2image import convert_from_path
import os
import shutil
class PdfManager:
def __init__(self):
# Use relative paths for Hugging Face Spaces compatibility
# Get the directory where the main application file is located
app_dir = os.path.dirname(os.path.abspath(__file__))
# Use /tmp for Hugging Face Spaces, fallback to relative path
if os.path.exists("/tmp") and os.access("/tmp", os.W_OK):
self.base_output_dir = "/tmp/pages"
print(f"βœ… Using /tmp directory for Hugging Face Spaces: {self.base_output_dir}")
else:
# Fallback to relative path from app directory
self.base_output_dir = os.path.join(app_dir, "pages")
print(f"βœ… Using relative path: {self.base_output_dir}")
# Ensure the base directory exists
os.makedirs(self.base_output_dir, exist_ok=True)
def clear_and_recreate_dir(self, output_folder):
print(f"Clearing output folder {output_folder}")
if os.path.exists(output_folder):
shutil.rmtree(output_folder)
#print("Clearing is unused for now for persistency")
else:
os.makedirs(output_folder)
#print("Clearing is unused for now for persistency")
def save_images(self, id, pdf_path, max_pages, pages: list[int] = None) -> list[str]:
# Use absolute path for Hugging Face Spaces compatibility
output_folder = os.path.join(self.base_output_dir, id)
images = convert_from_path(pdf_path)
print(f"Saving images from {pdf_path} to {output_folder}. Max pages: {max_pages}")
self.clear_and_recreate_dir(output_folder)
num_page_processed = 0
for i, image in enumerate(images):
if max_pages and num_page_processed >= max_pages:
break
if pages and i not in pages:
continue
full_save_path = os.path.join(output_folder, f"page_{i + 1}.png")
#print(f"Saving image to {full_save_path}")
image.save(full_save_path, "PNG")
num_page_processed += 1
return [os.path.join(output_folder, f"page_{i + 1}.png") for i in range(num_page_processed)]