Skip to content
Snippets Groups Projects
Commit 3ace4852 authored by Bo-Chun Chen's avatar Bo-Chun Chen
Browse files

Merge branch 'fix-pipeline' into 'main'

Fix knightly pipeline

See merge request rc/packer-openstack-hpc-image!35
parents aee285c8 90fb7d1d
No related branches found
No related tags found
1 merge request!35Fix knightly pipeline
Pipeline #7554 failed with stages
in 43 minutes and 30 seconds
Showing
with 3156 additions and 3003 deletions
---
- name: Install base packages
hosts: default
become: true
roles:
- { name: 'install_packages', tags: 'install_packages' }
This diff is collapsed.
---
- name: Setup node for use as a virtual cheaha node
hosts: default
become: true
roles:
- { name: 'cheaha.node', tags: 'cheaha.node' }
- { name: 'nfs_mounts', tags: 'nfs_mounts' }
- { name: 'ldap_config', tags: 'ldap_config' }
- { name: 'slurm_client', tags: 'slurm_client' }
......@@ -3,6 +3,9 @@
hosts: default
become: true
roles:
- { name: 'cheaha.node', tags: 'cheaha.node' }
- { name: 'install_packages', tags: 'install_packages' }
- { name: 'pam_slurm_adopt', tags: 'pam_slurm_adopt' }
- { name: 'lmod_user', tags: 'lmod_user'}
- name: Setup node for use as a virtual cheaha node
ansible.builtin.import_playbook: cheaha.yml
---
- name: Run a play
ansible.builtin.import_playbook: node-compute.yml
- name: Setup node for use as a cluster host with gpu drivers/pkgs
hosts: default
become: true
roles:
- { name: 'nvidia_driver', tags: 'nvidia_driver' }
- { name: 'install_packages', tags: 'install_packages' }
- { name: 'cuda_driver', tags: 'cuda_driver' }
---
#This file path is relative to the ansible playbook.
pkg_list_file: "cheaha-compute-yum-pkg-list.txt"
lmod_db_host_machine: "cheaha-master01"
driver_run_file_link: "https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run"
zsh_ver: 5.7.1
zsh_src_url: "https://www.zsh.org/pub/old/zsh-{{ zsh_ver }}.tar.xz"
yum_repo_files: []
pkg_list: []
slurm_version: 18.08.9
This diff is collapsed.
---
yum_repo_files:
- TurboVNC.repo
- cm.repo
pkg_list:
- "Lmod"
- "atftp-server"
- "cluster-tools-dell"
- "cluster-tools-slave"
- "cm-boost"
- "cm-config-ceph-release-luminous"
- "cm-config-cm"
- "cm-config-dhclient"
- "cm-config-dracut-slave"
- "cm-config-grub"
- "cm-config-ldap-client"
- "cm-config-limits"
- "cm-config-man"
- "cm-config-named"
- "cm-config-network-slave"
- "cm-config-nfsclient"
- "cm-config-rootfiles-slave"
- "cm-config-selinux"
- "cm-config-ssh-slave"
- "cm-config-sysctl-slave"
- "cm-config-syslog-slave"
- "cm-config-systemd"
- "cm-config-xntp-slave"
- "cm-config-yum"
- "cm-curl"
- "cm-dhcp"
- "cm-freeipmi"
- "cm-ipmitool"
- "cm-ipxe-slave"
- "cm-libpam"
- "cm-libprometheus"
- "cm-lua"
- "cm-mariadb-libs"
- "cm-modules-init-client"
- "cm-openssl"
- "cm-python2"
- "cm-python36"
- "cm-slave"
- "cm-uge-client"
- "cmburn"
- "cmburn-slave"
- "cmdaemon"
- "cmdaemon-remotecm"
- "confuse"
- "env-modules"
- "gcc-recent"
- "gdb-recent"
- "lshw"
- "lua-bit32"
- "lua-filesystem"
- "lua-json"
- "lua-lpeg"
- "lua-posix"
- "lua-term"
- "mysql++"
- "net-snmp-recent"
- "node-installer-slave"
- "openvpn"
- "pbspro-ce-client"
- "pbspro-client"
- "perl-Config-IniFiles"
- "python-azure-sdk"
- "python-dogpile-cache"
- "python-isodate"
- "python-netaddr"
- "python-netifaces"
- "python-oslo-i18n-lang"
- "python-oslo-utils-lang"
- "python-setuptools_scm"
- "python-testtools"
- "python-vcrpy"
- "python-websockify"
- "python2-cffi"
- "python2-cinderclient"
- "python2-cliff"
- "python2-debtcollector"
- "python2-deprecation"
- "python2-fixtures"
- "python2-funcsigs"
- "python2-glanceclient"
- "python2-heatclient"
- "python2-ipaddress"
- "python2-keystoneauth1"
- "python2-keystoneclient"
- "python2-novaclient"
- "python2-openstacksdk"
- "python2-os-client-config"
- "python2-osc-lib"
- "python2-oslo-config"
- "python2-oslo-i18n"
- "python2-oslo-serialization"
- "python2-oslo-utils"
- "python2-pbr"
- "python2-positional"
- "python2-pyOpenSSL"
- "python2-pysocks"
- "python2-pyyaml"
- "python2-requests-oauthlib"
- "python2-requestsexceptions"
- "python2-rfc3986"
- "python2-six"
- "python2-stevedore"
- "python2-swiftclient"
- "sdparm"
- "sge-client"
- "shorewall"
- "sshpass"
- "swig"
- "turbovnc"
---
yum_repo_files: []
pkg_list:
- "cuda-dcgm"
- "cuda-dcgm-libs"
- "cuda-dcgm-nvvs"
- "cuda-driver"
---
yum_repo_files:
- cm.repo
pkg_list:
- Lmod
---
- name: Setup node for use as a cluster host
hosts: default
become: true
roles:
- { name: 'compute_packages', tags: 'compute_packages' }
---
- name: Setup node for use as a virtual ood node
hosts: default
become: true
roles:
- { name: 'install_packages', tags: 'install_packages' }
- name: Setup node for use as a virtual cheaha node
ansible.builtin.import_playbook: cheaha.yml
---
# tasks file for cheaha.node
- name: Update /etc/hosts with cluster addressing
ansible.builtin.lineinfile:
path: /etc/hosts
......@@ -8,119 +7,31 @@
- "172.20.0.24 cheaha-master02.cm.cluster cheaha-master02"
- "172.20.0.22 cheaha-master01.cm.cluster cheaha-master01"
- "172.20.0.25 master.cm.cluster master localmaster.cm.cluster localmaster ldapserver.cm.cluster ldapserver"
- name: Add proper DNS search to lookup other nodes on the cluster
ansible.builtin.lineinfile:
path: /etc/dhcp/dhclient.conf
insertbefore: BOF
line: 'append domain-name " cm.cluster rc.uab.edu ib.cluster drac.cluster eth.cluster ib-hdr.cluster";'
- name: Disable SELinux
ansible.posix.selinux:
state: disabled
- name: Create slurm group
ansible.builtin.group:
name: slurm
state: present
gid: 450
- name: Create slurm user
ansible.builtin.user:
name: slurm
state: present
uid: 450
group: slurm
- name: Update nsswitch.conf to look for ldap
ansible.builtin.replace:
dest: /etc/nsswitch.conf
regexp: '^({{ item }}:(?!.*\bldap\b).*)$'
replace: '\1 ldap'
loop:
- passwd
- shadow
- group
- netgroup
- automount
- name: Create base directories
ansible.builtin.file:
path: "{{ item.dir }}"
state: directory
mode: "{{ item.mode }}"
loop:
- { dir: /local, mode: '0777' }
- { dir: /scratch, mode: '0755' }
- { dir: /share, mode: '0755' }
- { dir: /data/rc/apps, mode: '0755' } # this is only required for the symlink to be happy
- { dir: /data/user, mode: '0755' }
- { dir: /data/project, mode: '0755' }
- name: Set up NFS GPFS mount point(s)
ansible.posix.mount:
path: "{{ item.path }}"
src: "{{ item.src }}"
fstype: "{{ item.fstype }}"
opts: "{{ item.opts }}"
state: present
loop:
- { path: /cm/shared, src: "gpfs.rc.uab.edu:/data/cm/shared-8.2", fstype: nfs, opts: "vers=3,_netdev,defaults" }
- { path: /data/project, src: "gpfs.rc.uab.edu:/data/project", fstype: nfs, opts: "vers=3,_netdev,defaults" }
- { path: /data/user, src: "gpfs.rc.uab.edu:/data/user", fstype: nfs, opts: "vers=3,_netdev,local_lock=posix,defaults" }
- { path: /home, src: "/data/user/home", fstype: none, opts: bind }
- { path: /data/rc/apps, src: "gpfs.rc.uab.edu:/data/rc/apps", fstype: nfs, opts: "vers=3,_netdev,defaults" }
- name: Create symbolic links
ansible.builtin.file:
src: "{{ item.src }}"
dest: "{{ item.dest }}"
- name: Copy CM repo GPG key
ansible.builtin.copy:
src: RPM-GPG-KEY-cm
dest: /etc/pki/rpm-gpg/RPM-GPG-KEY-cm
owner: root
group: root
force: yes
state: link
loop:
- { src: /local, dest: /scratch/local }
- { src: /data/rc/apps, dest: /share/apps }
mode: 0644
- name: Add ssh key for root access
ansible.posix.authorized_key:
user: root
state: present
key: "{{ root_ssh_key }}"
- name: Copy munge key
ansible.builtin.copy:
src: munge.key
dest: /etc/munge/munge.key
owner: daemon
group: root
mode: 0400
- name: Copy ldap cert(s) into place
ansible.builtin.copy:
src: "{{ item.src }}"
dest: "/cm/local/apps/openldap/etc/certs/{{ item.src }}"
owner: ldap
group: ldap
mode: 0440
loop:
- { src: ca.pem }
- { src: ldap.key }
- { src: ldap.pem }
- name: Copy ldap config into place
ansible.builtin.copy:
src: nslcd.conf
dest: /etc/nslcd.conf
owner: root
group: root
mode: 0600
- name: Enable services
ansible.builtin.service:
name: "{{ item }}"
enabled: yes
loop:
- munge
- slurmd
- nslcd
- name: Set timezone to America/Chicago
community.general.timezone:
name: America/Chicago
- name: Install zsh
import_tasks: zsh.yml
- name: Disable firewalld
ansible.builtin.service:
name: firewalld
state: stopped
enabled: no
---
# vars file for cheaha.node
zsh_ver: 5.7.1
zsh_src_url: "https://www.zsh.org/pub/old/zsh-{{ zsh_ver }}.tar.xz"
---
# tasks file for cheaha.packages
- name: Install prerequisite packages
yum:
name: epel-release
state: present
- name: Copy yum repo files into place
ansible.builtin.copy:
src: "{{ item }}"
......@@ -12,13 +6,7 @@
owner: root
group: root
mode: 0644
loop:
- cm.repo
- ceph.repo
- dell-system-update.repo
- elastic.repo
- nux-dextop.repo
- TurboVNC.repo
loop: "{{ yum_repo_files }}"
- name: Copy CM repo GPG key
ansible.builtin.copy:
......@@ -27,27 +15,14 @@
owner: root
group: root
mode: 0644
when: "'cm.repo' in yum_repo_files"
- name: Install openstack-rocky repo
- name: Enable epel
ansible.builtin.yum:
name: centos-release-openstack-rocky
state: present
- name: Install required packages
yum:
name:
- slurm-client-18.08.9
- munge-0.5.13
- openldap-servers-2.4.48
- Lmod-7.7.14
- cm-modules-init-client-8.2
- cmdaemon
- nss-pam-ldapd
- ruby
- python3
state: present
name: epel-release
- name: Install system packages
- name: Install packages
ansible.builtin.yum:
name: "{{ lookup('file', '{{ pkg_list_file }}').splitlines() }}"
state: present
name: "{{ pkg_list }}"
---
- name: Install ldap packages
ansible.builtin.yum:
name: "{{ item }}"
state: present
loop:
- nss-pam-ldapd
- openldap
- openldap-clients
- openldap-servers
- sssd-ldap
- name: Update nsswitch.conf to look for ldap
ansible.builtin.replace:
dest: /etc/nsswitch.conf
regexp: '^({{ item }}:(?!.*\bldap\b).*)$'
replace: '\1 ldap'
loop:
- passwd
- shadow
- group
- netgroup
- automount
- name: Copy ldap cert(s) into place
ansible.builtin.copy:
src: "{{ item.src }}"
dest: "/cm/local/apps/openldap/etc/certs/{{ item.src }}"
owner: ldap
group: ldap
mode: 0440
loop:
- { src: ca.pem }
- { src: ldap.key }
- { src: ldap.pem }
- name: Copy ldap config into place
ansible.builtin.copy:
src: nslcd.conf
dest: /etc/nslcd.conf
owner: root
group: root
mode: 0600
- name: Enable services
ansible.builtin.service:
name: "{{ item }}"
enabled: yes
loop:
- nslcd
---
- name: Create base directories
ansible.builtin.file:
path: "{{ item.dir }}"
state: directory
mode: "{{ item.mode }}"
loop:
- { dir: /local, mode: '0777' }
- { dir: /scratch, mode: '0755' }
- { dir: /share, mode: '0755' }
- { dir: /data/rc/apps, mode: '0755' } # this is only required for the symlink to be happy
- { dir: /data/user, mode: '0755' }
- { dir: /data/project, mode: '0755' }
- name: Set up NFS GPFS mount point(s)
ansible.posix.mount:
path: "{{ item.path }}"
src: "{{ item.src }}"
fstype: "{{ item.fstype }}"
opts: "{{ item.opts }}"
state: present
loop:
- { path: /cm/shared, src: "gpfs.rc.uab.edu:/data/cm/shared-8.2", fstype: nfs, opts: "vers=3,_netdev,defaults" }
- { path: /data/project, src: "gpfs.rc.uab.edu:/data/project", fstype: nfs, opts: "vers=3,_netdev,defaults" }
- { path: /data/user, src: "gpfs.rc.uab.edu:/data/user", fstype: nfs, opts: "vers=3,_netdev,local_lock=posix,defaults" }
- { path: /home, src: "/data/user/home", fstype: none, opts: bind }
- { path: /data/rc/apps, src: "gpfs.rc.uab.edu:/data/rc/apps", fstype: nfs, opts: "vers=3,_netdev,defaults" }
- name: Create symbolic links
ansible.builtin.file:
src: "{{ item.src }}"
dest: "{{ item.dest }}"
owner: root
group: root
force: yes
state: link
loop:
- { src: /local, dest: /scratch/local }
- { src: /data/rc/apps, dest: /share/apps }
---
- name: Install slurm packages
ansible.builtin.yum:
name: "{{ item }}"
state: present
loop:
- munge
- "slurm-client-{{ slurm_version }}"
- name: Create slurm group
ansible.builtin.group:
name: slurm
state: present
gid: 450
- name: Create slurm user
ansible.builtin.user:
name: slurm
state: present
uid: 450
group: slurm
- name: Copy munge key
ansible.builtin.copy:
src: munge.key
dest: /etc/munge/munge.key
owner: daemon
group: root
mode: 0400
- name: Enable services
ansible.builtin.service:
name: "{{ item }}"
enabled: yes
loop:
- munge
- slurmd
......@@ -23,17 +23,20 @@ build {
sources = ["source.openstack.image"]
provisioner "ansible" {
playbook_file = "./ansible/node-compute.yml"
groups = ["compute"]
playbook_file = "./ansible/compute.yml"
roles_path = "./ansible/roles"
extra_arguments = [
"--extra-vars", "root_ssh_key='${var.root_ssh_key}'"
]
}
provisioner "ansible" {
playbook_file = "../CRI_XCBC/ood-packer.yaml"
use_proxy = false
ansible_env_vars = ["ANSIBLE_HOST_KEY_CHECKING=False"]
playbook_file = "../CRI_XCBC/ood-packer.yaml"
extra_arguments = [
"-t enable_lmod",
"-t lmod_user",
]
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment