Spaces:

viswatesting
/

plag-api

Sleeping

viswa-chaitanya commited on Mar 23

Commit

1e0fc11

1 Parent(s): 2dac2ef

added images format

Files changed (2) hide show

Dockerfile CHANGED Viewed

@@ -1,26 +1,3 @@
-# # Use Python base image
-# FROM python:3.9
-# # Install ffmpeg
-# RUN apt-get update && apt-get install -y ffmpeg
-# # Set the working directory
-# WORKDIR /app
-# # Copy requirements and install dependencies
-# COPY requirements.txt .
-# RUN pip install --no-cache-dir -r requirements.txt
-# # Copy all files into the container
-# COPY . .
-# # Expose the port FastAPI runs on
-# EXPOSE 7860
-# # Start the FastAPI app using Uvicorn
-# CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
-# Use Python base image
 FROM python:3.9
 # Install system dependencies: ffmpeg and Tesseract OCR
@@ -44,4 +21,4 @@ COPY . .
 EXPOSE 7860
 # Start the FastAPI app using Uvicorn
-CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

 FROM python:3.9
 # Install system dependencies: ffmpeg and Tesseract OCR
 EXPOSE 7860
 # Start the FastAPI app using Uvicorn
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

main.py CHANGED Viewed

@@ -78,7 +78,7 @@ async def extract_text(file_urls: List[str]):
                 text = extract_text_from_doc_bytes(file_bytes)
             elif ext in [".ppt", ".pptx"]:
                 text = extract_text_from_ppt_bytes(file_bytes)
-            elif ext in [".jpg", ".jpeg", ".png", ".bmp", ".tiff"]:
                 text = extract_text_from_image_bytes(file_bytes)
             else:
                 text = "Unsupported file format."
@@ -121,4 +121,4 @@ async def detect_plagiarism(request: AnswerRequest):
                 student_plagiarism_flags[i] = 1
                 student_plagiarism_flags[j] = 1
-    return {"plagiarism_flags": student_plagiarism_flags}

                 text = extract_text_from_doc_bytes(file_bytes)
             elif ext in [".ppt", ".pptx"]:
                 text = extract_text_from_ppt_bytes(file_bytes)
+            elif ext in [".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif", ".webp", ".pbm", ".pgm", ".ppm", ".jp2", ".pcx", ".pnm"]:
                 text = extract_text_from_image_bytes(file_bytes)
             else:
                 text = "Unsupported file format."
                 student_plagiarism_flags[i] = 1
                 student_plagiarism_flags[j] = 1
+    return {"plagiarism_flags": student_plagiarism_flags}