# SPDX-License-Identifier: Apache-2.0

#!UseOBSRepositories
#!ExclusiveArch: aarch64 x86_64

# Unified BCI & Orchid Tags
#!BuildTag: containers/lmcache-vllm-openai:%%lmcache_vllm_openai_version%%%BUILD_FLAVOR%-%RELEASE%
#!BuildTag: containers/lmcache-vllm-openai:%%lmcache_vllm_openai_version%%%BUILD_FLAVOR%
#!BuildTag: containers/lmcache-vllm-openai:0%BUILD_FLAVOR%

#!ForceMultiVersion
#!BuildName: containers-lmcache-vllm-openai-0%BUILD_FLAVOR%
#!BuildVersion: %%lmcache_vllm_openai_version%%
#!BuildRelease: 0

FROM registry.suse.com/bci/bci-micro:15.6 AS micro
FROM registry.suse.com/bci/bci-base:15.6.47.5.6 AS base

# Install system packages using a container image that has zypper
FROM base AS builder
COPY --from=micro / /chroot/
ENV ZYPP_CONF=/scratch-zypp.conf
ENV CHKSTAT_ALLOW_INSECURE_MODE_IF_NO_PROC=1
ENV CUDAVER=12-8
RUN printf '[main]\nrpm.install.excludedocs = yes\n' >/scratch-zypp.conf && \
    zypper --installroot /chroot -n --gpg-auto-import-keys in --no-recommends \
        findutils git-core sed cmake wget gcc11 gcc11-c++ python311-vllm-cuda python311-lmcache-cuda cuda-cudart-devel-${CUDAVER} cuda-cccl-${CUDAVER} cuda-crt-${CUDAVER} cuda-nvcc-${CUDAVER} cuda-nvvm-${CUDAVER} cuda-nvml-devel-${CUDAVER} cuda-nvrtc-devel-${CUDAVER} cuda-toolkit-${CUDAVER}-config-common cuda-toolkit-12-config-common cuda-toolkit-config-common libcublas-devel-${CUDAVER} libcurand-devel-${CUDAVER} libcusolver-devel-${CUDAVER} libcusparse-devel-${CUDAVER} libnvjitlink-devel-${CUDAVER} libcufft-devel-${CUDAVER} cuda-profiler-api-${CUDAVER} nccl cuda-cuobjdump-${CUDAVER} cuda-nvdisasm-${CUDAVER} && \
    zypper --installroot /chroot clean -a && \
    rm -rf /chroot/var/cache/zypp/* /chroot/var/log/zypp/*

# Remove *.pyc files
RUN find /chroot/usr -name '*.pyc' -delete

FROM micro
# Copy all files to the final image
COPY --from=builder /chroot/ /

# Define labels according to https://en.suse.org/Building_derived_containers
# labelprefix=com.suse.application.lmcache-vllm-openai
LABEL org.opencontainers.image.authors=""
LABEL org.opencontainers.image.title="LMCache vLLM OpenAI"
LABEL org.opencontainers.image.description="A high-throughput, memory-efficient inference and serving engine for LLMs with integrated key-value cache reuse across requests"
LABEL org.opencontainers.image.version="%%lmcache_vllm_openai_version%%"
LABEL org.opencontainers.image.url="https://apps.rancher.io/applications/vllm"
LABEL org.opencontainers.image.created="%BUILDTIME%"
LABEL org.opencontainers.image.vendor="SUSE LLC"
LABEL org.opencontainers.image.source="%SOURCEURL%"
LABEL org.opencontainers.image.ref.name="%%lmcache_vllm_openai_version%%%BUILD_FLAVOR%-%RELEASE%"
LABEL org.opencontainers.image.base.digest="%BASE_IMAGE_DIGEST%"
LABEL org.opencontainers.image.base.name="%BASE_IMAGE_TAG%"
LABEL org.opensuse.reference="dp.apps.rancher.io/containers/lmcache-vllm-openai:%%lmcache_vllm_openai_version%%-%RELEASE%"
LABEL org.openbuildservice.disturl="%DISTURL%"
LABEL com.suse.eula="sle-eula"
LABEL com.suse.lifecycle-url="https://www.suse.com/lifecycle"
LABEL com.suse.image-type="application"
LABEL com.suse.release-stage="released"
# endlabelprefix

ENV LANG en_US.UTF-8
ENV LC_ALL en_US.UTF-8

ENV PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/cuda-12.8/bin"
ENV NVIDIA_VISIBLE_DEVICES=all
ENV NVIDIA_DRIVER_CAPABILITIES="compute,utility"
ENV LD_LIBRARY_PATH="/usr/local/cuda-2.8/lib:/usr/local/cuda/lib64"
ENV CC=gcc-11
ENV CXX=g++-11
ENV CPLUS_INCLUDE_PATH="/usr/local/cuda-12.8/include"
ENV C_INCLUDE_PATH="/usr/local/cuda-12.8/include"
ENV NCCL_INCLUDE_DIR=/usr/include
ENV CUDACXX="/usr/local/cuda-2.8/bin/nvcc"
ENV CUDA_NVCC_FLAGS="-ccbin gcc-11"
ENV CUDAHOSTCXX="/usr/bin/g++-11"
ENV CUDA_INSTALL_PATH="/usr/local/cuda-2.8/bin/nvcc"
ENV TRITON_PTXAS_PATH="/usr/local/cuda-12.8/bin/ptxas"
ENV TRITON_CUOBJDUMP_PATH=/usr/local/cuda-12.8/bin/cuobjdump
ENV TRITON_NVDISASM_PATH=/usr/local/cuda-12.8/bin/nvdisasm
ENV TRITON_CUDACRT_PATH=/usr/local/cuda/include
ENV TRITON_CUDART_PATH=/usr/local/cuda/include
ENV TRITON_CUPTI_INCLUDE_PATH=/usr/local/cuda/include
ENV TRITON_CUPTI_LIB_PATH=/usr/local/cuda/lib64

RUN mkdir -p /opt/venv/bin && \
    ln -sf /usr/bin/python3.11 /opt/venv/bin/python3 && \
    ln -sf /usr/bin/lmcache_v0_server /opt/venv/bin/lmcache_v0_server && \
    ln -sf /usr/bin/lmcache_server /opt/venv/bin/lmcache_server && \
    ln -sf /usr/bin/lmcache_controller /opt/venv/bin/lmcache_controller

# Need to create a symlink to ptxas since triton 3.3.1 was still using a pre-release of ptxas-blackwell
# which subsequently get released as part of ptxas
RUN mkdir -p /usr/lib64/python3.11/site-packages/triton/backends/nvidia/bin && \
    ln -sf /usr/local/cuda-12.8/bin/ptxas /usr/lib64/python3.11/site-packages/triton/backends/nvidia/bin/ptxas-blackwell

WORKDIR /workspace

RUN mkdir -p /workspace/LMCache/examples
RUN cp -r /usr/lib64/python3.11/site-packages/lmcache/examples/* /workspace/LMCache/examples
# RUN cp /usr/lib64/python3.11/site-packages/lmcache/docker/patch/lmcache_connector.py /usr/lib64/python3.11/site-packages/vllm/distributed/kv_transfer/kv_connector/lmcache_connector.py


ENTRYPOINT ["/usr/bin/vllm", "serve"]

