使用 MPIJob 运行 8 卡 3090 的 NCCL 测试

提交 MPIJob 运行 NCCL 测试

kubectl apply -f - << EOF
apiVersion: kubeflow.org/v2beta1
kind: MPIJob
metadata:
  name: nccl-tests-3090
spec:
  slotsPerWorker: 8
  runPolicy:
    cleanPodPolicy: Running
  mpiReplicaSpecs:
    Launcher:
      replicas: 1
      template:
        spec:
          containers:
            - image: cr.zw1.paratera.com/tests/nccl-tests:12.2.2-cudnn8-devel-ubuntu20.04-nccl2.19.3-1-3e0fbc3
              name: nccl
              env:
                - name: OMPI_ALLOW_RUN_AS_ROOT
                  value: "1"
                - name: OMPI_ALLOW_RUN_AS_ROOT_CONFIRM
                  value: "1"
              # Uncomment to be able to exec in to launcher pod for interactive testing
              # command: ['sleep', '86400']
              command: ["/bin/bash", "-c"]
              args: [
                  "mpirun \
                  -np 8 \
                  -bind-to none \
                  -x LD_LIBRARY_PATH \
                  -x NCCL_SOCKET_IFNAME=eth0 \
                  -x NCCL_IB_HCA=eth0
                  /opt/nccl_tests/build/all_reduce_perf -b 512M -e 8G -f 2 -g 1 \
                  ",
                ]
 
              resources:
                limits:
                  cpu: 2
                  memory: 4Gi
          enableServiceLinks: false
          automountServiceAccountToken: false
    Worker:
      replicas: 1
      template:
        metadata:
          labels:
            job: nccl-tests
        spec:
          containers:
            - image: cr.zw1.paratera.com/tests/nccl-tests:12.2.2-cudnn8-devel-ubuntu20.04-nccl2.19.3-1-3e0fbc3
              name: nccl
              resources:
                limits:
                  cpu: 80
                  memory: 480Gi
                  nvidia.com/gpu: 8
              volumeMounts:
                - mountPath: /dev/shm
                  name: dshm
          volumes:
            - emptyDir:
                medium: Memory
              name: dshm
          tolerations:
          - key: nvidia.com/gpu
            operator: Exists
          affinity:
            nodeAffinity:
              requiredDuringSchedulingIgnoredDuringExecution:
                nodeSelectorTerms:
                  - matchExpressions:
                      - key: nvidia.com/gpu
                        operator: In
                        values:
                          - rtx-3090
          enableServiceLinks: false
          automountServiceAccountToken: false
EOF

查看 MPIJob 状态

kubectl get mpijob 
kubectl describe mpijob nccl-tests-3090

查看 NCCL 测试日志

kubectl logs nccl-tests-3090-launcher-xxxxx

测试结果

#
# Reducing maxBytes to 8119014741 due to memory limitation
#
#                                                              out-of-place                       in-place
#       size         count      type   redop    root     time   algbw   busbw #wrong     time   algbw   busbw #wrong
#        (B)    (elements)                               (us)  (GB/s)  (GB/s)            (us)  (GB/s)  (GB/s)
   536870912     134217728     float     sum      -1    58676    9.15   16.01      0    58509    9.18   16.06      0
  1073741824     268435456     float     sum      -1   117613    9.13   15.98      0   117659    9.13   15.97      0
  2147483648     536870912     float     sum      -1   236546    9.08   15.89      0   236985    9.06   15.86      0
  4294967296    1073741824     float     sum      -1   486906    8.82   15.44      0   487377    8.81   15.42      0
# Out of bounds values : 0 OK
# Avg bus bandwidth    : 15.8275
#

验证 MPIJob 使用 MPIJob 运行 16 卡 H100 的 NCCL 测试