1
+ #! /bin/bash
2
+
3
+ # Color codes for output
4
+ RED=' \033[0;31m'
5
+ GREEN=' \033[0;32m'
6
+ YELLOW=' \033[1;33m'
7
+ BLUE=' \033[0;34m'
8
+ NC=' \033[0m'
9
+
10
+ # Logging functions
11
+ log () { echo -e " ${GREEN} [SETUP]${NC} $1 " ; }
12
+ warn () { echo -e " ${YELLOW} [WARNING]${NC} $1 " ; }
13
+ error () { echo -e " ${RED} [ERROR]${NC} $1 " ; exit 1; }
14
+
15
+ # Check if running as root
16
+ if [ " $EUID " -ne 0 ]; then
17
+ error " This script must be run as root"
18
+ fi
19
+
20
+ # Install prerequisites and dependencies
21
+ install_dependencies () {
22
+ log " Installing prerequisites and dependencies..."
23
+
24
+ # Update package lists
25
+ apt update || error " Failed to update package lists"
26
+
27
+ # Essential packages needed for GPU detection and setup
28
+ PACKAGES=(
29
+ " wget"
30
+ " curl"
31
+ " pciutils"
32
+ " build-essential"
33
+ " software-properties-common"
34
+ " linux-headers-$( uname -r) "
35
+ )
36
+
37
+ # Install packages
38
+ for package in " ${PACKAGES[@]} " ; do
39
+ if ! dpkg -l | grep -q " ^ii.*$package " ; then
40
+ log " Installing $package ..."
41
+ apt install -y " $package " || warn " Failed to install $package "
42
+ else
43
+ log " $package is already installed"
44
+ fi
45
+ done
46
+
47
+ # Install Docker if not present
48
+ if ! command -v docker & > /dev/null; then
49
+ log " Installing Docker..."
50
+ curl -fsSL https://get.docker.com -o get-docker.sh
51
+ sh get-docker.sh
52
+ systemctl enable --now docker
53
+ fi
54
+ }
55
+
56
+ # Detect GPU type
57
+ detect_gpu_type () {
58
+ if lspci | grep -i " nvidia" > /dev/null; then
59
+ echo " nvidia"
60
+ elif lspci | grep -i " AMD\|Radeon" > /dev/null; then
61
+ echo " amd"
62
+ else
63
+ echo " unknown"
64
+ fi
65
+ }
66
+
67
+ # Setup NVIDIA
68
+ setup_nvidia () {
69
+ log " Setting up NVIDIA GPU environment..."
70
+
71
+ # Install NVIDIA drivers if not present
72
+ if ! command -v nvidia-smi & > /dev/null; then
73
+ log " Installing NVIDIA drivers..."
74
+ apt install -y ubuntu-drivers-common
75
+ ubuntu-drivers autoinstall
76
+ fi
77
+
78
+ # Setup NVIDIA Container Toolkit
79
+ log " Setting up NVIDIA Container Toolkit..."
80
+ curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
81
+ curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
82
+ sed ' s#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
83
+ tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
84
+
85
+ apt update
86
+ apt install -y nvidia-container-toolkit
87
+ nvidia-ctk runtime configure --runtime=docker
88
+
89
+ # Restart Docker
90
+ systemctl restart docker
91
+ sleep 5
92
+
93
+ # Install CUDA if not present
94
+ if ! command -v nvcc & > /dev/null; then
95
+ log " Installing CUDA toolkit..."
96
+ apt install -y nvidia-cuda-toolkit
97
+ fi
98
+ }
99
+
100
+ # Setup AMD
101
+ setup_amd () {
102
+ log " Setting up AMD GPU environment..."
103
+
104
+ # Install AMD GPU drivers and tools
105
+ apt install -y \
106
+ linux-headers-generic \
107
+ clinfo
108
+
109
+ # Add ROCm repository if needed
110
+ if [ ! -f /etc/apt/sources.list.d/rocm.list ]; then
111
+ log " Adding ROCm repository..."
112
+ wget -q -O - https://repo.radeon.com/rocm/rocm.gpg.key | apt-key add -
113
+ echo ' deb [arch=amd64] https://repo.radeon.com/rocm/apt/5.7 ubuntu main' | tee /etc/apt/sources.list.d/rocm.list
114
+ apt update
115
+ fi
116
+
117
+ # Install ROCm packages
118
+ apt install -y \
119
+ rocm-hip-sdk \
120
+ rocm-hip-runtime \
121
+ rocm-opencl-runtime \
122
+ rocm-hip-libraries \
123
+ rocm-dev \
124
+ rocm-utils \
125
+ hip-runtime-amd
126
+
127
+ # Add user to video group if SUDO_USER is available
128
+ if [ -n " $SUDO_USER " ]; then
129
+ usermod -a -G video " $SUDO_USER "
130
+ log " Added user $SUDO_USER to video group"
131
+ fi
132
+
133
+ # Set up environment variables
134
+ if [ ! -f /etc/profile.d/rocm.sh ]; then
135
+ echo ' export PATH=$PATH:/opt/rocm/bin:/opt/rocm/rocprofiler/bin:/opt/rocm/opencl/bin' > /etc/profile.d/rocm.sh
136
+ echo ' export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm/lib:/opt/rocm/lib64' >> /etc/profile.d/rocm.sh
137
+ chmod 644 /etc/profile.d/rocm.sh
138
+ fi
139
+ }
140
+
141
+ # Main function
142
+ main () {
143
+ log " Starting GPU setup..."
144
+
145
+ # Install dependencies first
146
+ install_dependencies
147
+
148
+ # Setup based on GPU type
149
+ GPU_TYPE=$( detect_gpu_type)
150
+ case $GPU_TYPE in
151
+ " nvidia" )
152
+ setup_nvidia
153
+ ;;
154
+ " amd" )
155
+ setup_amd
156
+ ;;
157
+ * )
158
+ error " No supported GPU detected (NVIDIA or AMD required)"
159
+ ;;
160
+ esac
161
+
162
+ systemctl disable gpu-setup.service
163
+
164
+ log " GPU setup completed successfully!"
165
+ }
166
+
167
+ # Execute main function
168
+ main
0 commit comments