.clang-format
.gitignore
.isort.cfg
.pre-commit-config.yaml
CODE_OF_CONDUCT.md
CONTRIBUTING.md
DCO
LICENSE
MAINTAINERS.md
MANIFEST.in
README.md
SECURITY.md
format.sh
pyproject.toml
setup.py
.buildkite/clean.sh
.buildkite/cleanup.sh
.buildkite/drawing_wrapper.py
.buildkite/end-to-end-test.sh
.buildkite/install-env.sh
.buildkite/install-lmcache.sh
.buildkite/multi-round-qa.sh
.buildkite/pipeline-docker.yml
.buildkite/pipeline.yml
.buildkite/vllm-integration-tests.yml
.buildkite/correctness/1-mmlu.py
.buildkite/correctness/2-mmlu.py
.buildkite/correctness/README.md
.buildkite/correctness/create_report.py
.buildkite/correctness/deploy-1-vllm.sh
.buildkite/correctness/deploy-2-lmcache.sh
.buildkite/correctness/download-data.sh
.buildkite/correctness/env-setup.sh
.buildkite/correctness/pipeline.mmlu.yml
.buildkite/correctness/summarize_scores.py
.github/PULL_REQUEST_TEMPLATE.md
.github/dependabot.yml
.github/ISSUE_TEMPLATE/bug_report.md
.github/ISSUE_TEMPLATE/feature_request.md
.github/actions/free-disk-space/action.yml
.github/workflows/actionlint.dockerfile
.github/workflows/actionlint.yml
.github/workflows/build_doc.yml
.github/workflows/code_quality_checks.yml
.github/workflows/codeql.yml
.github/workflows/nightly_build.yml
.github/workflows/publish.yml
.github/workflows/scorecard.yml
.github/workflows/stale_bot.yml
.github/workflows/matchers/actionlint.json
.github/workflows/matchers/mypy.json
asset/logo.png
benchmarks/multi-round-qa/README.md
benchmarks/multi-round-qa/data_preprocessing.py
benchmarks/multi-round-qa/multi-round-qa.py
benchmarks/multi-round-qa/prepare_sharegpt_data.sh
benchmarks/multi-round-qa/requirements.txt
benchmarks/multi-round-qa/utils.py
benchmarks/rag/README.md
benchmarks/rag/launch_lmcache.sh
benchmarks/rag/launch_vllm.sh
benchmarks/rag/precompute.py
benchmarks/rag/rag.py
benchmarks/rag/requirements.txt
benchmarks/rag/utils.py
csrc/ac_dec.cu
csrc/ac_enc.cu
csrc/cachegen_kernels.cuh
csrc/cal_cdf.cu
csrc/cuda_compat.h
csrc/dispatch_utils.h
csrc/mem_kernels.cu
csrc/mem_kernels.cuh
csrc/pos_kernels.cu
csrc/pos_kernels.cuh
csrc/pybind.cpp
docker/Dockerfile
docker/Dockerfile.manylinux-cuda
docker/example_build.sh
docker/example_run.sh
docker/patch/factory.py
docker/patch/lmcache_connector.py
docker/patch/parallel_state.patch
docs/Makefile
docs/README.md
docs/make.bat
docs/source/.nojekyll
docs/source/conf.py
docs/source/index.rst
docs/source/_static/custom.css
docs/source/_static/custom.js
docs/source/_static/scroll.css
docs/source/_templates/custom.html
docs/source/api_reference/configurations.rst
docs/source/api_reference/dynamic_connector.rst
docs/source/api_reference/storage_backends.rst
docs/source/assets/InfiniStore-usage.png
docs/source/assets/lmcache-logo.png
docs/source/assets/lmcache-logo_crop.png
docs/source/assets/mooncake-store-preview.png
docs/source/community/blogs.rst
docs/source/community/meetings.rst
docs/source/developer_guide/contributing.rst
docs/source/developer_guide/docker_file.rst
docs/source/developer_guide/usage/index.rst
docs/source/developer_guide/usage/usage_stats_collection.rst
docs/source/disaggregated_prefill/shared_storage.rst
docs/source/disaggregated_prefill/nixl/1p1d.rst
docs/source/disaggregated_prefill/nixl/index.rst
docs/source/disaggregated_prefill/nixl/xpyd.rst
docs/source/getting_started/faq.rst
docs/source/getting_started/installation.rst
docs/source/getting_started/troubleshoot.rst
docs/source/getting_started/quickstart/disaggregated_prefill.rst
docs/source/getting_started/quickstart/index.rst
docs/source/getting_started/quickstart/offload_kv_cache.rst
docs/source/getting_started/quickstart/share_kv_cache.rst
docs/source/kv_cache/cpu_ram.rst
docs/source/kv_cache/gds.rst
docs/source/kv_cache/infinistore.rst
docs/source/kv_cache/local_storage.rst
docs/source/kv_cache/mooncake.rst
docs/source/kv_cache/redis.rst
docs/source/kv_cache/valkey.rst
docs/source/kv_cache/weka.rst
docs/source/kv_cache_management/check_finish.rst
docs/source/kv_cache_management/clear.rst
docs/source/kv_cache_management/compress.rst
docs/source/kv_cache_management/controller.rst
docs/source/kv_cache_management/lookup.rst
docs/source/kv_cache_management/move.rst
docs/source/kv_cache_management/persist.rst
docs/source/kv_cache_optimizations/blending.rst
docs/source/kv_cache_optimizations/compression/cachegen.rst
docs/source/kv_cache_optimizations/compression/index.rst
docs/source/production/docker_deployment.rst
docs/source/production/kubernetes_deployment.rst
examples/blend_kv/README.md
examples/blend_kv/batched_kv.py
examples/blend_kv/batched_tp_kv.py
examples/blend_kv/blend_kv.py
examples/blend_kv/chunk1.txt
examples/blend_kv/chunk2.txt
examples/blend_kv/example_blending.yaml
examples/blend_kv/online_kv.py
examples/blend_kv/tp_kv.py
examples/blend_kv_v1/README.md
examples/blend_kv_v1/blend.py
examples/cache_controller/README.md
examples/cache_controller/example.yaml
examples/cache_interface/README.md
examples/cache_interface/example.yaml
examples/disagg_prefill/README.md
examples/disagg_prefill/1p1d/README.md
examples/disagg_prefill/1p1d/disagg_example_nixl.sh
examples/disagg_prefill/1p1d/disagg_proxy_server_first_token_from_decoder.py
examples/disagg_prefill/1p1d/disagg_proxy_server_first_token_from_prefiller.py
examples/disagg_prefill/1p1d/disagg_vllm_launcher.sh
examples/disagg_prefill/1p1d/configs/lmcache-decoder-config.yaml
examples/disagg_prefill/1p1d/configs/lmcache-prefiller-config.yaml
examples/disagg_prefill/xp1d/README.md
examples/disagg_prefill/xp1d/disagg_example_xp1d.sh
examples/disagg_prefill/xp1d/disagg_proxy_server_first_token_from_decoder.py
examples/disagg_prefill/xp1d/disagg_proxy_server_first_token_from_prefiller.py
examples/disagg_prefill/xp1d/disagg_vllm_launcher.sh
examples/disagg_prefill/xp1d/configs/lmcache-decoder-config.yaml
examples/disagg_prefill/xp1d/configs/lmcache-prefiller-config.yaml
examples/frontend/README.md
examples/frontend/chat_session.py
examples/frontend/example.yaml
examples/frontend/ffmpeg.txt
examples/frontend/frontend.py
examples/kubernetes/health_probe.py
examples/kv_cache_calculator/README.md
examples/kv_cache_calculator/generate_config.py
examples/kv_cache_calculator/kv_cache_calculator.html
examples/kv_cache_calculator/modelconfig.json
examples/kv_cache_calculator/requirement.txt
examples/kv_cache_reuse/README.md
examples/kv_cache_reuse/local_backends/README.md
examples/kv_cache_reuse/local_backends/offload.py
examples/kv_cache_reuse/remote_backends/README.md
examples/kv_cache_reuse/remote_backends/external/README.md
examples/kv_cache_reuse/remote_backends/external/backend_type.yaml
examples/kv_cache_reuse/remote_backends/infinistore/README.md
examples/kv_cache_reuse/remote_backends/infinistore/backend_type.yaml
examples/kv_cache_reuse/remote_backends/mooncakestore/README.md
examples/kv_cache_reuse/remote_backends/mooncakestore/backend_type.yaml
examples/kv_cache_reuse/share_across_instances/README.md
examples/kv_cache_reuse/share_across_instances/centralized_sharing/README.md
examples/kv_cache_reuse/share_across_instances/centralized_sharing/example.yaml
examples/kv_cache_reuse/share_across_instances/p2p_sharing/README.md
examples/kv_cache_reuse/share_across_instances/p2p_sharing/example1.yaml
examples/kv_cache_reuse/share_across_instances/p2p_sharing/example2.yaml
examples/online_session/README.md
examples/online_session/bench_ttft_sweep.sh
examples/online_session/example.yaml
examples/online_session/ffmpeg.txt
examples/online_session/openai_chat_completion_client.py
examples/redis_lookup/README.md
examples/sgl_integration/README.md
examples/sgl_integration/lmcache_config.yaml
lmcache/__init__.py
lmcache/_version.py
lmcache/cache_engine.py
lmcache/config.py
lmcache/connections.py
lmcache/logging.py
lmcache/observability.py
lmcache/protocol.py
lmcache/usage_context.py
lmcache/utils.py
lmcache.egg-info/PKG-INFO
lmcache.egg-info/SOURCES.txt
lmcache.egg-info/dependency_links.txt
lmcache.egg-info/entry_points.txt
lmcache.egg-info/requires.txt
lmcache.egg-info/top_level.txt
lmcache/blend/__init__.py
lmcache/blend/executor.py
lmcache/blend/interfaces.py
lmcache/blend/retriever.py
lmcache/integration/__init__.py
lmcache/integration/sglang/__init__.py
lmcache/integration/sglang/sglang_adapter.py
lmcache/integration/sglang/utils.py
lmcache/integration/vllm/__init__.py
lmcache/integration/vllm/lmcache_connector_v1.py
lmcache/integration/vllm/lmcache_connector_v1_085.py
lmcache/integration/vllm/utils.py
lmcache/integration/vllm/vllm_adapter.py
lmcache/integration/vllm/vllm_v1_adapter.py
lmcache/server/__init__.py
lmcache/server/__main__.py
lmcache/server/server_storage_backend/__init__.py
lmcache/server/server_storage_backend/abstract_backend.py
lmcache/server/server_storage_backend/local_backend.py
lmcache/storage_backend/__init__.py
lmcache/storage_backend/abstract_backend.py
lmcache/storage_backend/hybrid_backend.py
lmcache/storage_backend/local_backend.py
lmcache/storage_backend/remote_backend.py
lmcache/storage_backend/connector/__init__.py
lmcache/storage_backend/connector/base_connector.py
lmcache/storage_backend/connector/lm_connector.py
lmcache/storage_backend/connector/redis_connector.py
lmcache/storage_backend/evictor/__init__.py
lmcache/storage_backend/evictor/base_evictor.py
lmcache/storage_backend/evictor/lru_evictor.py
lmcache/storage_backend/mem_pool/__init__.py
lmcache/storage_backend/mem_pool/base_pool.py
lmcache/storage_backend/mem_pool/local_pool.py
lmcache/storage_backend/serde/__init__.py
lmcache/storage_backend/serde/cachegen_basics.py
lmcache/storage_backend/serde/cachegen_decoder.py
lmcache/storage_backend/serde/cachegen_encoder.py
lmcache/storage_backend/serde/fast_serde.py
lmcache/storage_backend/serde/safe_serde.py
lmcache/storage_backend/serde/serde.py
lmcache/storage_backend/serde/torch_serde.py
lmcache/v1/__init__.py
lmcache/v1/cache_engine.py
lmcache/v1/cache_interface.py
lmcache/v1/config.py
lmcache/v1/gpu_connector.py
lmcache/v1/memory_management.py
lmcache/v1/protocol.py
lmcache/v1/token_database.py
lmcache/v1/api_server/__init__.py
lmcache/v1/api_server/__main__.py
lmcache/v1/cache_controller/__init__.py
lmcache/v1/cache_controller/controller_manager.py
lmcache/v1/cache_controller/executor.py
lmcache/v1/cache_controller/message.py
lmcache/v1/cache_controller/rpc_utils.py
lmcache/v1/cache_controller/worker.py
lmcache/v1/cache_controller/controllers/__init__.py
lmcache/v1/cache_controller/controllers/kv_controller.py
lmcache/v1/cache_controller/controllers/registration_controller.py
lmcache/v1/compute/__init__.py
lmcache/v1/compute/positional_encoding.py
lmcache/v1/compute/attention/__init__.py
lmcache/v1/compute/attention/abstract.py
lmcache/v1/compute/attention/flash_attn.py
lmcache/v1/compute/attention/metadata.py
lmcache/v1/compute/blend/__init__.py
lmcache/v1/compute/blend/blender.py
lmcache/v1/compute/blend/metadata.py
lmcache/v1/compute/blend/utils.py
lmcache/v1/compute/models/__init__.py
lmcache/v1/compute/models/llama.py
lmcache/v1/compute/models/utils.py
lmcache/v1/distributed_server/__init__.py
lmcache/v1/distributed_server/abstract_server.py
lmcache/v1/distributed_server/naive_server.py
lmcache/v1/lookup_client/__init__.py
lmcache/v1/lookup_client/abstract_client.py
lmcache/v1/lookup_client/factory.py
lmcache/v1/lookup_client/lmcache_lookup_client.py
lmcache/v1/lookup_client/mooncake_lookup_client.py
lmcache/v1/lookup_server/__init__.py
lmcache/v1/lookup_server/abstract_server.py
lmcache/v1/lookup_server/redis_server.py
lmcache/v1/server/__init__.py
lmcache/v1/server/__main__.py
lmcache/v1/server/utils.py
lmcache/v1/server/storage_backend/__init__.py
lmcache/v1/server/storage_backend/abstract_backend.py
lmcache/v1/server/storage_backend/local_backend.py
lmcache/v1/storage_backend/__init__.py
lmcache/v1/storage_backend/abstract_backend.py
lmcache/v1/storage_backend/gds_backend.py
lmcache/v1/storage_backend/local_cpu_backend.py
lmcache/v1/storage_backend/local_disk_backend.py
lmcache/v1/storage_backend/nixl_backend.py
lmcache/v1/storage_backend/remote_backend.py
lmcache/v1/storage_backend/storage_manager.py
lmcache/v1/storage_backend/weka_gds_backend.py
lmcache/v1/storage_backend/connector/__init__.py
lmcache/v1/storage_backend/connector/audit_adapter.py
lmcache/v1/storage_backend/connector/audit_connector.py
lmcache/v1/storage_backend/connector/base_connector.py
lmcache/v1/storage_backend/connector/blackhole_adapter.py
lmcache/v1/storage_backend/connector/blackhole_connector.py
lmcache/v1/storage_backend/connector/external_adapter.py
lmcache/v1/storage_backend/connector/fs_adapter.py
lmcache/v1/storage_backend/connector/fs_connector.py
lmcache/v1/storage_backend/connector/infinistore_adapter.py
lmcache/v1/storage_backend/connector/infinistore_connector.py
lmcache/v1/storage_backend/connector/instrumented_connector.py
lmcache/v1/storage_backend/connector/lm_adapter.py
lmcache/v1/storage_backend/connector/lm_connector.py
lmcache/v1/storage_backend/connector/mooncakestore_adapter.py
lmcache/v1/storage_backend/connector/mooncakestore_connector.py
lmcache/v1/storage_backend/connector/nixl_connector.py
lmcache/v1/storage_backend/connector/nixl_connector_v2.py
lmcache/v1/storage_backend/connector/nixl_utils.py
lmcache/v1/storage_backend/connector/redis_adapter.py
lmcache/v1/storage_backend/connector/redis_connector.py
lmcache/v1/storage_backend/evictor/__init__.py
lmcache/v1/storage_backend/evictor/base_evictor.py
lmcache/v1/storage_backend/evictor/lru_evictor.py
lmcache/v1/storage_backend/naive_serde/__init__.py
lmcache/v1/storage_backend/naive_serde/cachegen_basics.py
lmcache/v1/storage_backend/naive_serde/cachegen_decoder.py
lmcache/v1/storage_backend/naive_serde/cachegen_encoder.py
lmcache/v1/storage_backend/naive_serde/kivi_serde.py
lmcache/v1/storage_backend/naive_serde/naive_serde.py
lmcache/v1/storage_backend/naive_serde/serde.py
requirements/bench.txt
requirements/build.txt
requirements/common.txt
requirements/cuda.txt
requirements/docs.txt
requirements/lint.txt
requirements/test.txt
scripts/vllm-integration-tests.sh
tests/__init__.py
tests/conftest.py
tests/pytest.ini
tests/test_blend.py
tests/test_evictor.py
tests/test_observability.py
tests/test_protocol.py
tests/test_serde.py
tests/benchmarks/decompress.py
tests/benchmarks/prefetch.py
tests/benchmarks/test_benchmark.py
tests/benchmarks/transmit.py
tests/data/test_creation_from_file/disk.yaml
tests/data/test_creation_from_file/fail.yaml
tests/data/test_creation_from_file/hybrid.yaml
tests/data/test_creation_from_file/local.yaml
tests/data/test_creation_from_file/remote.yaml
tests/disagg/README.md
tests/disagg/test_nixl_cache_engine.py
tests/disagg/test_nixl_channel.py
tests/disagg/test_nixl_channel_v2.py
tests/disagg/test_nixl_pipe.py
tests/disagg/test_nixl_pipe_v2.py
tests/disagg/test_nixl_storage_backend.py
tests/v1/test_cache_engine.py
tests/v1/test_cache_interface.py
tests/v1/test_config.py
tests/v1/test_connector.py
tests/v1/test_gds.py
tests/v1/test_gpu_connector.py
tests/v1/test_mem_kernels.py
tests/v1/test_memory_management.py
tests/v1/test_pos_kernels.py
tests/v1/test_remote_mla_worker_id_as0.py
tests/v1/test_token_database.py
tests/v1/test_vllm_integration.py
tests/v1/test_weka.py
tests/v1/utils.py
tests/v1/data/gds.yaml
tests/v1/data/test_config.yaml
tests/v1/data/weka.yaml