try deploy on a10
This commit is contained in:
parent
12df581e7f
commit
ce73fe1ecf
|
|
@ -0,0 +1,43 @@
|
|||
# Use the official Python image as the base image.
|
||||
FROM python:3.10-slim
|
||||
|
||||
# Declare build argument with a default value (0 means GPU not enabled)
|
||||
ARG ENABLE_CUDA=0
|
||||
|
||||
# Set environment variables to optimize Python behavior in the container.
|
||||
ENV PYTHONDONTWRITEBYTECODE=1
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
ENV ENABLE_CUDA=1
|
||||
|
||||
# Set the working directory inside the container.
|
||||
WORKDIR /app
|
||||
|
||||
# Install system dependencies.
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
curl \
|
||||
libmagic1 \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy the requirements file.
|
||||
COPY requirements.txt .
|
||||
|
||||
# Conditionally install dependencies:
|
||||
# - If ENABLE_CUDA=1, install all dependencies.
|
||||
# - Otherwise, filter out GPU-specific dependencies.
|
||||
RUN pip install --upgrade pip && \
|
||||
if [ "$ENABLE_CUDA" -eq "1" ]; then \
|
||||
pip install --no-cache-dir -r requirements.txt; \
|
||||
else \
|
||||
grep -v 'llama_index.readers.docling' requirements.txt > requirements_filtered.txt && \
|
||||
pip install --no-cache-dir -r requirements_filtered.txt; \
|
||||
fi
|
||||
|
||||
# Copy the entire project directory into the container.
|
||||
COPY . .
|
||||
|
||||
# Expose the port on which FastAPI will run.
|
||||
EXPOSE 7860
|
||||
# Define the default command to run the FastAPI application using uvicorn.
|
||||
CMD ["python", "app.py"]
|
||||
3
app.py
3
app.py
|
|
@ -53,7 +53,6 @@ model = AutoModelForVision2Seq.from_pretrained("ds4sd/SmolDocling-256M-preview",
|
|||
#_attn_implementation="flash_attention_2"
|
||||
).to("cuda")
|
||||
|
||||
@spaces.GPU
|
||||
def model_inference(
|
||||
input_dict, history
|
||||
):
|
||||
|
|
@ -151,4 +150,4 @@ demo = gr.ChatInterface(fn=model_inference, title="SmolDocling-256M: Ultra-compa
|
|||
cache_examples=False
|
||||
)
|
||||
|
||||
demo.launch(debug=True)
|
||||
demo.launch(debug=True,server_name="0.0.0.0", server_port=7860)
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
docling_app:
|
||||
build:
|
||||
context: .
|
||||
args:
|
||||
ENABLE_CUDA: '1'
|
||||
container_name: gradio_app
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "7860:7860" # Gradio application port
|
||||
volumes:
|
||||
- ./app/services/weaviate_service.py:/gradio_app/weaviate_service.py
|
||||
environment:
|
||||
ENABLE_CUDA: '1'
|
||||
NVIDIA_VISIBLE_DEVICES: '3'
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
device_ids: ['3'] # Specify GPU 3
|
||||
capabilities: [gpu]
|
||||
shm_size: 1g
|
||||
depends_on:
|
||||
- rag_app
|
||||
Loading…
Reference in New Issue