viswa-chaitanya commited on
Commit
2dac2ef
·
1 Parent(s): c1089dd

image error

Browse files
Files changed (3) hide show
  1. Dockerfile +29 -3
  2. main.py +8 -0
  3. requirements.txt +9 -9
Dockerfile CHANGED
@@ -1,8 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # Use Python base image
2
  FROM python:3.9
3
 
4
- # Install ffmpeg
5
- RUN apt-get update && apt-get install -y ffmpeg
 
 
 
 
6
 
7
  # Set the working directory
8
  WORKDIR /app
@@ -18,4 +44,4 @@ COPY . .
18
  EXPOSE 7860
19
 
20
  # Start the FastAPI app using Uvicorn
21
- CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
 
1
+ # # Use Python base image
2
+ # FROM python:3.9
3
+
4
+ # # Install ffmpeg
5
+ # RUN apt-get update && apt-get install -y ffmpeg
6
+
7
+ # # Set the working directory
8
+ # WORKDIR /app
9
+
10
+ # # Copy requirements and install dependencies
11
+ # COPY requirements.txt .
12
+ # RUN pip install --no-cache-dir -r requirements.txt
13
+
14
+ # # Copy all files into the container
15
+ # COPY . .
16
+
17
+ # # Expose the port FastAPI runs on
18
+ # EXPOSE 7860
19
+
20
+ # # Start the FastAPI app using Uvicorn
21
+ # CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
22
+
23
  # Use Python base image
24
  FROM python:3.9
25
 
26
+ # Install system dependencies: ffmpeg and Tesseract OCR
27
+ RUN apt-get update && apt-get install -y \
28
+ ffmpeg \
29
+ tesseract-ocr \
30
+ libtesseract-dev \
31
+ && rm -rf /var/lib/apt/lists/*
32
 
33
  # Set the working directory
34
  WORKDIR /app
 
44
  EXPOSE 7860
45
 
46
  # Start the FastAPI app using Uvicorn
47
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
main.py CHANGED
@@ -42,11 +42,19 @@ def extract_text_from_ppt_bytes(file_bytes: bytes) -> str:
42
  )
43
  return clean_text(text)
44
 
 
 
 
 
 
45
  def extract_text_from_image_bytes(file_bytes: bytes) -> str:
 
 
46
  image = Image.open(io.BytesIO(file_bytes))
47
  text = pytesseract.image_to_string(image)
48
  return clean_text(text)
49
 
 
50
  def download_file(url: str) -> bytes:
51
  response = requests.get(url)
52
  response.raise_for_status()
 
42
  )
43
  return clean_text(text)
44
 
45
+ # def extract_text_from_image_bytes(file_bytes: bytes) -> str:
46
+ # image = Image.open(io.BytesIO(file_bytes))
47
+ # text = pytesseract.image_to_string(image)
48
+ # return clean_text(text)
49
+
50
  def extract_text_from_image_bytes(file_bytes: bytes) -> str:
51
+ pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"
52
+
53
  image = Image.open(io.BytesIO(file_bytes))
54
  text = pytesseract.image_to_string(image)
55
  return clean_text(text)
56
 
57
+
58
  def download_file(url: str) -> bytes:
59
  response = requests.get(url)
60
  response.raise_for_status()
requirements.txt CHANGED
@@ -1,12 +1,12 @@
1
  fastapi[standard]
2
  uvicorn
3
- pydantic
4
- numpy
5
- faiss-cpu
6
- sentence-transformers
7
- pymupdf
8
  requests
9
- python-docx
10
- python-pptx
11
- pytesseract
12
- pillow
 
 
 
 
 
 
1
  fastapi[standard]
2
  uvicorn
 
 
 
 
 
3
  requests
4
+ python-multipart
5
+ pytesseract
6
+ pillow
7
+ pymupdf
8
+ python-docx
9
+ python-pptx
10
+ numpy
11
+ faiss-cpu
12
+ sentence-transformers