Spaces:
Sleeping
Sleeping
Commit
·
1e0fc11
1
Parent(s):
2dac2ef
added images format
Browse files- Dockerfile +1 -24
- main.py +2 -2
Dockerfile
CHANGED
@@ -1,26 +1,3 @@
|
|
1 |
-
# # Use Python base image
|
2 |
-
# FROM python:3.9
|
3 |
-
|
4 |
-
# # Install ffmpeg
|
5 |
-
# RUN apt-get update && apt-get install -y ffmpeg
|
6 |
-
|
7 |
-
# # Set the working directory
|
8 |
-
# WORKDIR /app
|
9 |
-
|
10 |
-
# # Copy requirements and install dependencies
|
11 |
-
# COPY requirements.txt .
|
12 |
-
# RUN pip install --no-cache-dir -r requirements.txt
|
13 |
-
|
14 |
-
# # Copy all files into the container
|
15 |
-
# COPY . .
|
16 |
-
|
17 |
-
# # Expose the port FastAPI runs on
|
18 |
-
# EXPOSE 7860
|
19 |
-
|
20 |
-
# # Start the FastAPI app using Uvicorn
|
21 |
-
# CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
22 |
-
|
23 |
-
# Use Python base image
|
24 |
FROM python:3.9
|
25 |
|
26 |
# Install system dependencies: ffmpeg and Tesseract OCR
|
@@ -44,4 +21,4 @@ COPY . .
|
|
44 |
EXPOSE 7860
|
45 |
|
46 |
# Start the FastAPI app using Uvicorn
|
47 |
-
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
FROM python:3.9
|
2 |
|
3 |
# Install system dependencies: ffmpeg and Tesseract OCR
|
|
|
21 |
EXPOSE 7860
|
22 |
|
23 |
# Start the FastAPI app using Uvicorn
|
24 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
main.py
CHANGED
@@ -78,7 +78,7 @@ async def extract_text(file_urls: List[str]):
|
|
78 |
text = extract_text_from_doc_bytes(file_bytes)
|
79 |
elif ext in [".ppt", ".pptx"]:
|
80 |
text = extract_text_from_ppt_bytes(file_bytes)
|
81 |
-
elif ext in [".jpg", ".jpeg", ".png", ".bmp", ".tiff"]:
|
82 |
text = extract_text_from_image_bytes(file_bytes)
|
83 |
else:
|
84 |
text = "Unsupported file format."
|
@@ -121,4 +121,4 @@ async def detect_plagiarism(request: AnswerRequest):
|
|
121 |
student_plagiarism_flags[i] = 1
|
122 |
student_plagiarism_flags[j] = 1
|
123 |
|
124 |
-
return {"plagiarism_flags": student_plagiarism_flags}
|
|
|
78 |
text = extract_text_from_doc_bytes(file_bytes)
|
79 |
elif ext in [".ppt", ".pptx"]:
|
80 |
text = extract_text_from_ppt_bytes(file_bytes)
|
81 |
+
elif ext in [".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif", ".webp", ".pbm", ".pgm", ".ppm", ".jp2", ".pcx", ".pnm"]:
|
82 |
text = extract_text_from_image_bytes(file_bytes)
|
83 |
else:
|
84 |
text = "Unsupported file format."
|
|
|
121 |
student_plagiarism_flags[i] = 1
|
122 |
student_plagiarism_flags[j] = 1
|
123 |
|
124 |
+
return {"plagiarism_flags": student_plagiarism_flags}
|