Spaces:

viswatesting
/

plag-api

Sleeping

viswa-chaitanya commited on Mar 23

Commit

2dac2ef

1 Parent(s): c1089dd

image error

Files changed (3) hide show

Dockerfile CHANGED Viewed

@@ -1,8 +1,34 @@
 # Use Python base image
 FROM python:3.9
-# Install ffmpeg
-RUN apt-get update && apt-get install -y ffmpeg
 # Set the working directory
 WORKDIR /app
@@ -18,4 +44,4 @@ COPY . .
 EXPOSE 7860
 # Start the FastAPI app using Uvicorn
-CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

+# # Use Python base image
+# FROM python:3.9
+# # Install ffmpeg
+# RUN apt-get update && apt-get install -y ffmpeg
+# # Set the working directory
+# WORKDIR /app
+# # Copy requirements and install dependencies
+# COPY requirements.txt .
+# RUN pip install --no-cache-dir -r requirements.txt
+# # Copy all files into the container
+# COPY . .
+# # Expose the port FastAPI runs on
+# EXPOSE 7860
+# # Start the FastAPI app using Uvicorn
+# CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
 # Use Python base image
 FROM python:3.9
+# Install system dependencies: ffmpeg and Tesseract OCR
+RUN apt-get update && apt-get install -y \
+    ffmpeg \
+    tesseract-ocr \
+    libtesseract-dev \
+    && rm -rf /var/lib/apt/lists/*
 # Set the working directory
 WORKDIR /app
 EXPOSE 7860
 # Start the FastAPI app using Uvicorn
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

main.py CHANGED Viewed

@@ -42,11 +42,19 @@ def extract_text_from_ppt_bytes(file_bytes: bytes) -> str:
     )
     return clean_text(text)
 def extract_text_from_image_bytes(file_bytes: bytes) -> str:
     image = Image.open(io.BytesIO(file_bytes))
     text = pytesseract.image_to_string(image)
     return clean_text(text)
 def download_file(url: str) -> bytes:
     response = requests.get(url)
     response.raise_for_status()

     )
     return clean_text(text)
+# def extract_text_from_image_bytes(file_bytes: bytes) -> str:
+#     image = Image.open(io.BytesIO(file_bytes))
+#     text = pytesseract.image_to_string(image)
+#     return clean_text(text)
 def extract_text_from_image_bytes(file_bytes: bytes) -> str:
+    pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"
     image = Image.open(io.BytesIO(file_bytes))
     text = pytesseract.image_to_string(image)
     return clean_text(text)
 def download_file(url: str) -> bytes:
     response = requests.get(url)
     response.raise_for_status()

requirements.txt CHANGED Viewed

@@ -1,12 +1,12 @@
 fastapi[standard]
 uvicorn
-pydantic
-numpy
-faiss-cpu
-sentence-transformers
-pymupdf
 requests
-python-docx
-python-pptx
-pytesseract
-pillow

 fastapi[standard]
 uvicorn
 requests
+python-multipart
+pytesseract
+pillow
+pymupdf
+python-docx
+python-pptx
+numpy
+faiss-cpu
+sentence-transformers