overrides: add fixups for vllm wheel mode

[vllm](https://github.com/vllm-project/vllm) is a popular server for LLM inference.

1. `autoPatchelfIgnoreMissingDeps` is set for CUDA shared libraries, similar to existing `xxx-cu11` packages.
2. `lib.optionalAttrs (!old.src.isWheel)` are added for the existing fixups that are not compatible with wheels.
This commit is contained in:
Yang, Bo 2024-03-25 15:47:39 -07:00 committed by Phillip Cloud
parent 7f5a70a677
commit b1d3256604
No known key found for this signature in database
GPG key ID: D908212070FD785E
6 changed files with 3658 additions and 4 deletions

View file

@ -288,6 +288,10 @@ lib.composeManyExtensions [
attr = "flit-core";
} else prev.argon2-cffi;
autoawq-kernels = prev.autoawq-kernels.overridePythonAttrs (_attrs: {
autoPatchelfIgnoreMissingDeps = true;
});
aws-cdk-asset-node-proxy-agent-v6 = prev.aws-cdk-asset-node-proxy-agent-v6.overridePythonAttrs (
old: lib.optionalAttrs (!(old.src.isWheel or false)) {
postPatch = ''
@ -385,6 +389,10 @@ lib.composeManyExtensions [
}
);
bitsandbytes = prev.bitsandbytes.overridePythonAttrs (_attrs: {
autoPatchelfIgnoreMissingDeps = true;
});
cairocffi = prev.cairocffi.overridePythonAttrs (
old: {
buildInputs = (old.buildInputs or [ ]) ++ [ final.pytest-runner ];
@ -624,6 +632,10 @@ lib.composeManyExtensions [
}
);
cupy-cuda12x = prev.cupy-cuda12x.overridePythonAttrs (_attrs: {
autoPatchelfIgnoreMissingDeps = true;
});
cyclonedx-python-lib = prev.cyclonedx-python-lib.overridePythonAttrs (old: {
propagatedBuildInputs = (old.propagatedBuildInputs or [ ]) ++ [ final.setuptools ];
postPatch = ''
@ -1419,7 +1431,7 @@ lib.composeManyExtensions [
);
llvm = pkgs."llvmPackages_${llvm_version}".llvm or (throw "LLVM${llvm_version} has been removed from nixpkgs; upgrade llvmlite or use older nixpkgs");
in
{
lib.optionalAttrs (!(old.src.isWheel or false)) {
inherit llvm;
nativeBuildInputs = (old.nativeBuildInputs or [ ]) ++ [ final.llvmlite.llvm ];
@ -1715,6 +1727,12 @@ lib.composeManyExtensions [
}
);
numba = prev.numba.overridePythonAttrs (
old: {
autoPatchelfIgnoreMissingDeps = old.src.isWheel or false;
}
);
netcdf4 = prev.netcdf4.overridePythonAttrs (
old: {
propagatedBuildInputs = (old.propagatedBuildInputs or [ ]) ++ [
@ -1798,7 +1816,7 @@ lib.composeManyExtensions [
];
});
nvidia-cudnn-cu12 = prev.nvidia-cudnn-cu11.overridePythonAttrs (attrs: {
nvidia-cudnn-cu12 = prev.nvidia-cudnn-cu12.overridePythonAttrs (attrs: {
propagatedBuildInputs = attrs.propagatedBuildInputs or [ ] ++ [
final.nvidia-cublas-cu12
];
@ -1810,7 +1828,7 @@ lib.composeManyExtensions [
];
});
nvidia-cusolver-cu12 = prev.nvidia-cusolver-cu11.overridePythonAttrs (attrs: {
nvidia-cusolver-cu12 = prev.nvidia-cusolver-cu12.overridePythonAttrs (attrs: {
propagatedBuildInputs = attrs.propagatedBuildInputs or [ ] ++ [
final.nvidia-cublas-cu12
];
@ -3626,7 +3644,7 @@ lib.composeManyExtensions [
};
in
{
lib.optionalAttrs (!old.src.isWheel or false) {
inherit src cargoDeps;
patchPhase = builtins.concatStringsSep "\n" [
@ -3954,6 +3972,31 @@ lib.composeManyExtensions [
pydantic = prev.pydantic.overridePythonAttrs
(old: { buildInputs = old.buildInputs or [ ] ++ [ pkgs.libxcrypt ]; });
vllm = prev.vllm.overridePythonAttrs (old: {
autoPatchelfIgnoreMissingDeps = true;
} // lib.optionalAttrs (!(old.src.isWheel or false)) rec {
CUDA_HOME = pkgs.symlinkJoin {
name = "vllm-cuda-home";
paths = [
pkgs.cudaPackages.libcusparse
pkgs.cudaPackages.libnvjitlink
pkgs.cudaPackages.libcublas
pkgs.cudaPackages.libcusolver
pkgs.cudaPackages.cuda_nvcc
pkgs.cudaPackages.cuda_cccl
pkgs.cudaPackages.cuda_cudart
];
};
nativeBuildInputs = old.nativeBuildInputs ++ [
pkgs.which
];
LD_LIBRARY_PATH = "${CUDA_HOME}/lib";
});
xformers = prev.xformers.overridePythonAttrs (_attrs: {
autoPatchelfIgnoreMissingDeps = true;
});
y-py = prev.y-py.override {
preferWheel = true;
};

View file

@ -149,6 +149,7 @@ in
no-infinite-recur-on-missing-gitignores = callTest ./no-infinite-recur-on-missing-gitignores { };
pyzmq = callTest ./pyzmq { };
git-subdirectory-hook = callTest ./git-subdirectory-hook { };
pandas = callTest ./pandas { };
} // lib.optionalAttrs (!stdenv.isDarwin) {
# Editable tests fails on Darwin because of sandbox paths
pep600 = callTest ./pep600 { };
@ -156,6 +157,7 @@ in
# Fails because of missing inputs on darwin
text-generation-webui = callTest ./text-generation-webui { };
vllm-wheel = callTest ./vllm-wheel { };
# Cross tests fail on darwin for some strange reason:
# ERROR: MarkupSafe-2.0.1-cp39-cp39-linux_aarch64.whl is not a supported wheel on this platform.

View file

@ -0,0 +1,15 @@
{ poetry2nix, python311, runCommand }:
let
env = poetry2nix.mkPoetryEnv {
python = python311;
pyproject = ./pyproject.toml;
poetrylock = ./poetry.lock;
preferWheels = true;
};
in
runCommand "vllm-wheel" { } ''
export HF_HOME="$(mktemp -d)"
${env}/bin/python -c 'import vllm; print(vllm.__version__)' > $out
''

3576
tests/vllm-wheel/poetry.lock generated Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,18 @@
[tool.poetry]
name = "vllm-wheel"
version = "0.1.0"
description = ""
authors = ["Your Name <you@example.com>"]
[tool.poetry.dependencies]
python = "^3.10"
bitsandbytes = "^0.43.0"
torch = "2.1.2"
vllm = "0.3.3"
# See https://docs.vllm.ai/en/latest/quantization/auto_awq.html
autoawq = "^0.2.4"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

View file