viswa-chaitanya commited on
Commit
1e0fc11
·
1 Parent(s): 2dac2ef

added images format

Browse files
Files changed (2) hide show
  1. Dockerfile +1 -24
  2. main.py +2 -2
Dockerfile CHANGED
@@ -1,26 +1,3 @@
1
- # # Use Python base image
2
- # FROM python:3.9
3
-
4
- # # Install ffmpeg
5
- # RUN apt-get update && apt-get install -y ffmpeg
6
-
7
- # # Set the working directory
8
- # WORKDIR /app
9
-
10
- # # Copy requirements and install dependencies
11
- # COPY requirements.txt .
12
- # RUN pip install --no-cache-dir -r requirements.txt
13
-
14
- # # Copy all files into the container
15
- # COPY . .
16
-
17
- # # Expose the port FastAPI runs on
18
- # EXPOSE 7860
19
-
20
- # # Start the FastAPI app using Uvicorn
21
- # CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
22
-
23
- # Use Python base image
24
  FROM python:3.9
25
 
26
  # Install system dependencies: ffmpeg and Tesseract OCR
@@ -44,4 +21,4 @@ COPY . .
44
  EXPOSE 7860
45
 
46
  # Start the FastAPI app using Uvicorn
47
- CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  FROM python:3.9
2
 
3
  # Install system dependencies: ffmpeg and Tesseract OCR
 
21
  EXPOSE 7860
22
 
23
  # Start the FastAPI app using Uvicorn
24
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
main.py CHANGED
@@ -78,7 +78,7 @@ async def extract_text(file_urls: List[str]):
78
  text = extract_text_from_doc_bytes(file_bytes)
79
  elif ext in [".ppt", ".pptx"]:
80
  text = extract_text_from_ppt_bytes(file_bytes)
81
- elif ext in [".jpg", ".jpeg", ".png", ".bmp", ".tiff"]:
82
  text = extract_text_from_image_bytes(file_bytes)
83
  else:
84
  text = "Unsupported file format."
@@ -121,4 +121,4 @@ async def detect_plagiarism(request: AnswerRequest):
121
  student_plagiarism_flags[i] = 1
122
  student_plagiarism_flags[j] = 1
123
 
124
- return {"plagiarism_flags": student_plagiarism_flags}
 
78
  text = extract_text_from_doc_bytes(file_bytes)
79
  elif ext in [".ppt", ".pptx"]:
80
  text = extract_text_from_ppt_bytes(file_bytes)
81
+ elif ext in [".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif", ".webp", ".pbm", ".pgm", ".ppm", ".jp2", ".pcx", ".pnm"]:
82
  text = extract_text_from_image_bytes(file_bytes)
83
  else:
84
  text = "Unsupported file format."
 
121
  student_plagiarism_flags[i] = 1
122
  student_plagiarism_flags[j] = 1
123
 
124
+ return {"plagiarism_flags": student_plagiarism_flags}