20
20
# Install prerequisites and dependencies
21
21
install_dependencies () {
22
22
log " Installing prerequisites and dependencies..."
23
-
23
+
24
24
# Update package lists
25
25
apt update || error " Failed to update package lists"
26
26
@@ -67,12 +67,20 @@ detect_gpu_type() {
67
67
# Setup NVIDIA
68
68
setup_nvidia () {
69
69
log " Setting up NVIDIA GPU environment..."
70
-
70
+
71
71
# Install NVIDIA drivers if not present
72
72
if ! command -v nvidia-smi & > /dev/null; then
73
- log " Installing NVIDIA drivers..."
73
+ log " Installing NVIDIA drivers using ubuntu-drivers autoinstall ..."
74
74
apt install -y ubuntu-drivers-common
75
75
ubuntu-drivers autoinstall
76
+
77
+ # Check if nvidia-smi is available after autoinstall
78
+ if ! command -v nvidia-smi & > /dev/null; then
79
+ log " Autoinstall didn't provide nvidia-smi, installing specific driver 535..."
80
+ add-apt-repository -y ppa:graphics-drivers/ppa
81
+ apt update
82
+ apt install -y nvidia-driver-535 nvidia-utils-535
83
+ fi
76
84
fi
77
85
78
86
# Setup NVIDIA Container Toolkit
@@ -86,10 +94,33 @@ setup_nvidia() {
86
94
apt install -y nvidia-container-toolkit
87
95
nvidia-ctk runtime configure --runtime=docker
88
96
97
+ # Configure Docker for NVIDIA runtime
98
+ log " Configuring Docker for NVIDIA runtime..."
99
+ mkdir -p /etc/docker
100
+ cat > /etc/docker/daemon.json << EOF
101
+ {
102
+ "default-runtime": "nvidia",
103
+ "runtimes": {
104
+ "nvidia": {
105
+ "path": "nvidia-container-runtime",
106
+ "runtimeArgs": []
107
+ }
108
+ }
109
+ }
110
+ EOF
111
+
89
112
# Restart Docker
90
113
systemctl restart docker
91
114
sleep 5
92
115
116
+ # Final check for nvidia-smi
117
+ if ! command -v nvidia-smi & > /dev/null; then
118
+ warn " nvidia-smi still not available after driver installation!"
119
+ warn " This may require a system reboot to fully activate drivers"
120
+ else
121
+ log " NVIDIA drivers successfully installed and nvidia-smi is available"
122
+ fi
123
+
93
124
# Install CUDA if not present
94
125
if ! command -v nvcc & > /dev/null; then
95
126
log " Installing CUDA toolkit..."
@@ -100,7 +131,7 @@ setup_nvidia() {
100
131
# Setup AMD
101
132
setup_amd () {
102
133
log " Setting up AMD GPU environment..."
103
-
134
+
104
135
# Install AMD GPU drivers and tools
105
136
apt install -y \
106
137
linux-headers-generic \
@@ -141,7 +172,7 @@ setup_amd() {
141
172
# Main function
142
173
main () {
143
174
log " Starting GPU setup..."
144
-
175
+
145
176
# Install dependencies first
146
177
install_dependencies
147
178
@@ -165,4 +196,4 @@ main() {
165
196
}
166
197
167
198
# Execute main function
168
- main
199
+ main
0 commit comments