系统环境准备
本次搭建的集群系统包括2台服务器 headnode 和 node1
###headnode
[root@test1 ~]# cat /etc/redhat-release
CentOS Linux release 7.6.1810 (Core)
###node1
[root@node1 ~]# cat /etc/redhat-release
CentOS Linux release 7.6.1810 (Core)
[root@headnode ~]# lscpu
Architecture: x86_64
CPU op-mode(s): 32-bit, 64-bit
Byte Order: Little Endian
CPU(s): 128
On-line CPU(s) list: 0-127
Thread(s) per core: 1
Core(s) per socket: 64
Socket(s): 2
NUMA node(s): 8
Vendor ID: AuthenticAMD
CPU family: 23
Model: 49
Model name: AMD EPYC 7702 64-Core Processor
安装前准备
安装系统必要的库函数
[root]# yum install libtool openssl-devel libxml2-devel boost-devel gcc gcc-c++
torque-6.1.2.zip
torque-6.1.2安装配置
服务器安装: headnode作为server节点
unzip torque-6.1.2.zip
cd torque-6.1.2
./autogen.sh
./configure --prefix=/opt/pbs_612 --enable-cgroups --with-scp --with-default-server=test1
make -j8 ##编译
make install ##安装
make packages ##制作安装包
torque-package-clients-linux-x86_64.sh
torque-package-devel-linux-x86_64.sh
torque-package-doc-linux-x86_64.sh
torque-package-mom-linux-x86_64.sh
torque-package-server-linux-x86_64.sh
cp contrib/systemd/pbs_sched.service /usr/lib/systemd/system
source /etc/profile.d/torque.sh ##加载环境变量
./torque.setup root ##初始化数据库
##启动服务
systemctl enable pbs_server
systemctl enable pbs_mom
systemctl enable pbs_sched
systemctl enable trqauthd
vim /var/spool/torque/server_priv/nodes ###添加计算节点
node1 np=128
node2 np=128
###指定服务端节点
vim /var/spool/torque/mom_priv/config
pbsserver head4
logevent 255
###启动服务
systemctl restart pbs_server
systemctl restart pbs_mom
systemctl restart pbs_sched
systemctl restart trqauthd
qnodes ###查看节点状态或者pbsnodes -a
安装maui
torque默认情况下使用pbs_sched管理,但是调度策略过于简单,因此建议在服务节点上安装maui来进行作业调度,注意:在计算节点上无须安装maui
tar -xvf maui-3.3.1.tar.gz
cd maui-3.3.1
./configure --prefix=/opt/pbs_612/maui_331 --with-pbs=/opt/pbs_612
make -j8&&make install
cp maui.cfg.dist /opt/pbs_612/maui_331/maui.cfg
vim maui.cfg
SERVERHOST headnode
# primary admin must be first in list
ADMIN1 root
# Resource Manager Definition
RMCFG[TEST1] TYPE=PBS@RMNMHOST@RMTYPE[0] PBS
cp contrib/service-scripts/redhat.maui.d /etc/init.d/maui
chmod +x /etc/init.d/maui
vim /etc/init.d/maui
MAUI_PREFIX=/opt/pbs_612/maui_331
daemon --user root $MAUI_PREFIX/sbin/maui ###将用户名改为root
/etc/init.d/maui start ###启动maui
systemctl disable pbs_sched
systemctl stop maui ###停止maui服务
客户端节点安装
scp torque-package-clients-linux-x86_64.sh torque-package-mom-linux-x86_64.sh test2:~
###登录到test2节点进行安装
./torque-package-clients-linux-x86_64.sh --install
./torque-package-mom-linux-x86_64.sh --install
###配置服务端节点
vim /var/spool/torque/mom_priv/config
pbsserver headnode
logevent 255
###启动服务
systemctl enable pbs_mom
systemctl enable trqauthd
systemctl restart pbs_mom
systemctl restart trqauthd
测试作业
###创建共享目录和用户test,建立免密
echo sleep 30 | qsub