Skip to content

Commit a20f51f

Browse files
committed
fixed issue of no nvidia driver with 2 gpus as nvidia-drivers autoinstall fails
1 parent ad527f2 commit a20f51f

File tree

1 file changed

+37
-6
lines changed

1 file changed

+37
-6
lines changed

tfgrid3/openwebui/gpu-setup.sh

Lines changed: 37 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ fi
2020
# Install prerequisites and dependencies
2121
install_dependencies() {
2222
log "Installing prerequisites and dependencies..."
23-
23+
2424
# Update package lists
2525
apt update || error "Failed to update package lists"
2626

@@ -67,12 +67,20 @@ detect_gpu_type() {
6767
# Setup NVIDIA
6868
setup_nvidia() {
6969
log "Setting up NVIDIA GPU environment..."
70-
70+
7171
# Install NVIDIA drivers if not present
7272
if ! command -v nvidia-smi &>/dev/null; then
73-
log "Installing NVIDIA drivers..."
73+
log "Installing NVIDIA drivers using ubuntu-drivers autoinstall..."
7474
apt install -y ubuntu-drivers-common
7575
ubuntu-drivers autoinstall
76+
77+
# Check if nvidia-smi is available after autoinstall
78+
if ! command -v nvidia-smi &>/dev/null; then
79+
log "Autoinstall didn't provide nvidia-smi, installing specific driver 535..."
80+
add-apt-repository -y ppa:graphics-drivers/ppa
81+
apt update
82+
apt install -y nvidia-driver-535 nvidia-utils-535
83+
fi
7684
fi
7785

7886
# Setup NVIDIA Container Toolkit
@@ -86,10 +94,33 @@ setup_nvidia() {
8694
apt install -y nvidia-container-toolkit
8795
nvidia-ctk runtime configure --runtime=docker
8896

97+
# Configure Docker for NVIDIA runtime
98+
log "Configuring Docker for NVIDIA runtime..."
99+
mkdir -p /etc/docker
100+
cat > /etc/docker/daemon.json <<EOF
101+
{
102+
"default-runtime": "nvidia",
103+
"runtimes": {
104+
"nvidia": {
105+
"path": "nvidia-container-runtime",
106+
"runtimeArgs": []
107+
}
108+
}
109+
}
110+
EOF
111+
89112
# Restart Docker
90113
systemctl restart docker
91114
sleep 5
92115

116+
# Final check for nvidia-smi
117+
if ! command -v nvidia-smi &>/dev/null; then
118+
warn "nvidia-smi still not available after driver installation!"
119+
warn "This may require a system reboot to fully activate drivers"
120+
else
121+
log "NVIDIA drivers successfully installed and nvidia-smi is available"
122+
fi
123+
93124
# Install CUDA if not present
94125
if ! command -v nvcc &>/dev/null; then
95126
log "Installing CUDA toolkit..."
@@ -100,7 +131,7 @@ setup_nvidia() {
100131
# Setup AMD
101132
setup_amd() {
102133
log "Setting up AMD GPU environment..."
103-
134+
104135
# Install AMD GPU drivers and tools
105136
apt install -y \
106137
linux-headers-generic \
@@ -141,7 +172,7 @@ setup_amd() {
141172
# Main function
142173
main() {
143174
log "Starting GPU setup..."
144-
175+
145176
# Install dependencies first
146177
install_dependencies
147178

@@ -165,4 +196,4 @@ main() {
165196
}
166197

167198
# Execute main function
168-
main
199+
main

0 commit comments

Comments
 (0)