Weak scaling performance, ReaxFF HNS benchmark, KNL, 128K atoms/node
Performance in millions of atom-timesteps / second / node

Nodes CPU/KNL (mpi,hyper) OMP/KNL (mpi,thread,hyper) Kokkos/KNL (mpi,thread,hyper) Kokkos/serial/KNL (mpi,hyper)
1 0.1985 (128,2) 0.2608 (32,8,4) 0.2003 (64,4,4) 0.2341 (128,2)
2 0.1953 (128,2) 0.2572 (32,8,4) 0.1986 (128,2,4) 0.2294 (128,2)
4 0.178 (128,2) 0.2576 (32,8,4) 0.1955 (128,2,4) 0.224 (128,2)
8 0.1912 (128,2) 0.2571 (32,8,4) 0.1944 (128,2,4) 0.2224 (128,2)
16 0.1909 (128,2) 0.2522 (32,8,4) 0.1926 (128,2,4) 0.2178 (128,2)
32 0.1649 (128,2) 0.2516 (32,8,4) 0.1903 (128,2,4) 0.2151 (128,2)
64 0.1569 (128,2) 0.2504 (32,8,4) 0.1893 (128,2,4) 0.2118 (128,2)

Run commands and logfile links for column CPU/KNL

1 srun -n 128 -C knl --ntasks-per-node 128 --cpu_bind=threads -c 2 ./lmp_mutrino_knl -v x 8 -v y 8 -v z 6 -v t 100 -in in.reaxc.hns.steps -nocite -log log.lammps.date=17Jan18.model=hns.machine=mutrino.pkg=cpu_knl.kind=weak.size=128K.node=1.mpi=128.hyper=2
2 srun -n 256 -C knl --ntasks-per-node 128 --cpu_bind=threads -c 2 ./lmp_mutrino_knl -v x 8 -v y 8 -v z 12 -v t 100 -in in.reaxc.hns.steps -nocite -log log.lammps.date=17Jan18.model=hns.machine=mutrino.pkg=cpu_knl.kind=weak.size=128K.node=2.mpi=128.hyper=2
4 srun -n 512 -C knl --ntasks-per-node 128 --cpu_bind=threads -c 2 ./lmp_mutrino_knl -v x 8 -v y 16 -v z 12 -v t 100 -in in.reaxc.hns.steps -nocite -log log.lammps.date=17Jan18.model=hns.machine=mutrino.pkg=cpu_knl.kind=weak.size=128K.node=4.mpi=128.hyper=2
8 srun -n 1024 -C knl --ntasks-per-node 128 --cpu_bind=threads -c 2 ./lmp_mutrino_knl -v x 16 -v y 16 -v z 12 -v t 100 -in in.reaxc.hns.steps -nocite -log log.lammps.date=17Jan18.model=hns.machine=mutrino.pkg=cpu_knl.kind=weak.size=128K.node=8.mpi=128.hyper=2
16 srun -n 2048 -C knl --ntasks-per-node 128 --cpu_bind=threads -c 2 ./lmp_mutrino_knl -v x 16 -v y 16 -v z 24 -v t 100 -in in.reaxc.hns.steps -nocite -log log.lammps.date=17Jan18.model=hns.machine=mutrino.pkg=cpu_knl.kind=weak.size=128K.node=16.mpi=128.hyper=2
32 srun -n 4096 -C knl --ntasks-per-node 128 --cpu_bind=threads -c 2 ./lmp_mutrino_knl -v x 16 -v y 32 -v z 24 -v t 100 -in in.reaxc.hns.steps -nocite -log log.lammps.date=17Jan18.model=hns.machine=mutrino.pkg=cpu_knl.kind=weak.size=128K.node=32.mpi=128.hyper=2
64 srun -n 8192 -C knl --ntasks-per-node 128 --cpu_bind=threads -c 2 ./lmp_mutrino_knl -v x 32 -v y 32 -v z 24 -v t 100 -in in.reaxc.hns.steps -nocite -log log.lammps.date=17Jan18.model=hns.machine=mutrino.pkg=cpu_knl.kind=weak.size=128K.node=64.mpi=128.hyper=2

Run commands and logfile links for column OMP/KNL

1 setenv OMP_NUM_THREADS 8; srun -n 32 -C knl --ntasks-per-node 32 --cpu_bind=cores -c 8 ./lmp_mutrino_knl -sf omp -pk omp 8 -v x 8 -v y 8 -v z 6 -v t 100 -in in.reaxc.hns.omp.steps -nocite -log log.lammps.date=17Jan18.model=hns.machine=mutrino.pkg=omp_knl.kind=weak.size=128K.node=1.mpi=32.thread=8.hyper=4
2 setenv OMP_NUM_THREADS 8; srun -n 64 -C knl --ntasks-per-node 32 --cpu_bind=cores -c 8 ./lmp_mutrino_knl -sf omp -pk omp 8 -v x 8 -v y 8 -v z 12 -v t 100 -in in.reaxc.hns.omp.steps -nocite -log log.lammps.date=17Jan18.model=hns.machine=mutrino.pkg=omp_knl.kind=weak.size=128K.node=2.mpi=32.thread=8.hyper=4
4 setenv OMP_NUM_THREADS 8; srun -n 128 -C knl --ntasks-per-node 32 --cpu_bind=cores -c 8 ./lmp_mutrino_knl -sf omp -pk omp 8 -v x 8 -v y 16 -v z 12 -v t 100 -in in.reaxc.hns.omp.steps -nocite -log log.lammps.date=17Jan18.model=hns.machine=mutrino.pkg=omp_knl.kind=weak.size=128K.node=4.mpi=32.thread=8.hyper=4
8 setenv OMP_NUM_THREADS 8; srun -n 256 -C knl --ntasks-per-node 32 --cpu_bind=cores -c 8 ./lmp_mutrino_knl -sf omp -pk omp 8 -v x 16 -v y 16 -v z 12 -v t 100 -in in.reaxc.hns.omp.steps -nocite -log log.lammps.date=17Jan18.model=hns.machine=mutrino.pkg=omp_knl.kind=weak.size=128K.node=8.mpi=32.thread=8.hyper=4
16 setenv OMP_NUM_THREADS 8; srun -n 512 -C knl --ntasks-per-node 32 --cpu_bind=cores -c 8 ./lmp_mutrino_knl -sf omp -pk omp 8 -v x 16 -v y 16 -v z 24 -v t 100 -in in.reaxc.hns.omp.steps -nocite -log log.lammps.date=17Jan18.model=hns.machine=mutrino.pkg=omp_knl.kind=weak.size=128K.node=16.mpi=32.thread=8.hyper=4
32 setenv OMP_NUM_THREADS 8; srun -n 1024 -C knl --ntasks-per-node 32 --cpu_bind=cores -c 8 ./lmp_mutrino_knl -sf omp -pk omp 8 -v x 16 -v y 32 -v z 24 -v t 100 -in in.reaxc.hns.omp.steps -nocite -log log.lammps.date=17Jan18.model=hns.machine=mutrino.pkg=omp_knl.kind=weak.size=128K.node=32.mpi=32.thread=8.hyper=4
64 setenv OMP_NUM_THREADS 8; srun -n 2048 -C knl --ntasks-per-node 32 --cpu_bind=cores -c 8 ./lmp_mutrino_knl -sf omp -pk omp 8 -v x 32 -v y 32 -v z 24 -v t 100 -in in.reaxc.hns.omp.steps -nocite -log log.lammps.date=17Jan18.model=hns.machine=mutrino.pkg=omp_knl.kind=weak.size=128K.node=64.mpi=32.thread=8.hyper=4

Run commands and logfile links for column Kokkos/KNL

1 setenv OMP_NUM_THREADS 4; srun -n 64 -C knl --ntasks-per-node 64 --cpu_bind=cores -c 4 ./lmp_mutrino_kokkos_knl -sf kk -k on t 4 -pk kokkos neigh half neigh/qeq full newton on comm no -v x 8 -v y 8 -v z 6 -v t 100 -in in.reaxc.hns.steps -nocite -log log.lammps.date=17Jan18.model=hns.machine=mutrino.pkg=kokkos_knl.kind=weak.size=128K.node=1.mpi=64.thread=4.hyper=4
2 setenv OMP_NUM_THREADS 2; srun -n 256 -C knl --ntasks-per-node 128 --cpu_bind=threads -c 2 ./lmp_mutrino_kokkos_knl -sf kk -k on t 2 -pk kokkos neigh half neigh/qeq full newton on comm no -v x 8 -v y 8 -v z 12 -v t 100 -in in.reaxc.hns.steps -nocite -log log.lammps.date=17Jan18.model=hns.machine=mutrino.pkg=kokkos_knl.kind=weak.size=128K.node=2.mpi=128.thread=2.hyper=4
4 setenv OMP_NUM_THREADS 2; srun -n 512 -C knl --ntasks-per-node 128 --cpu_bind=threads -c 2 ./lmp_mutrino_kokkos_knl -sf kk -k on t 2 -pk kokkos neigh half neigh/qeq full newton on comm no -v x 8 -v y 16 -v z 12 -v t 100 -in in.reaxc.hns.steps -nocite -log log.lammps.date=17Jan18.model=hns.machine=mutrino.pkg=kokkos_knl.kind=weak.size=128K.node=4.mpi=128.thread=2.hyper=4
8 setenv OMP_NUM_THREADS 2; srun -n 1024 -C knl --ntasks-per-node 128 --cpu_bind=threads -c 2 ./lmp_mutrino_kokkos_knl -sf kk -k on t 2 -pk kokkos neigh half neigh/qeq full newton on comm no -v x 16 -v y 16 -v z 12 -v t 100 -in in.reaxc.hns.steps -nocite -log log.lammps.date=17Jan18.model=hns.machine=mutrino.pkg=kokkos_knl.kind=weak.size=128K.node=8.mpi=128.thread=2.hyper=4
16 setenv OMP_NUM_THREADS 2; srun -n 2048 -C knl --ntasks-per-node 128 --cpu_bind=threads -c 2 ./lmp_mutrino_kokkos_knl -sf kk -k on t 2 -pk kokkos neigh half neigh/qeq full newton on comm no -v x 16 -v y 16 -v z 24 -v t 100 -in in.reaxc.hns.steps -nocite -log log.lammps.date=17Jan18.model=hns.machine=mutrino.pkg=kokkos_knl.kind=weak.size=128K.node=16.mpi=128.thread=2.hyper=4
32 setenv OMP_NUM_THREADS 2; srun -n 4096 -C knl --ntasks-per-node 128 --cpu_bind=threads -c 2 ./lmp_mutrino_kokkos_knl -sf kk -k on t 2 -pk kokkos neigh half neigh/qeq full newton on comm no -v x 16 -v y 32 -v z 24 -v t 100 -in in.reaxc.hns.steps -nocite -log log.lammps.date=17Jan18.model=hns.machine=mutrino.pkg=kokkos_knl.kind=weak.size=128K.node=32.mpi=128.thread=2.hyper=4
64 setenv OMP_NUM_THREADS 2; srun -n 8192 -C knl --ntasks-per-node 128 --cpu_bind=threads -c 2 ./lmp_mutrino_kokkos_knl -sf kk -k on t 2 -pk kokkos neigh half neigh/qeq full newton on comm no -v x 32 -v y 32 -v z 24 -v t 100 -in in.reaxc.hns.steps -nocite -log log.lammps.date=17Jan18.model=hns.machine=mutrino.pkg=kokkos_knl.kind=weak.size=128K.node=64.mpi=128.thread=2.hyper=4

Run commands and logfile links for column Kokkos/serial/KNL

1 srun -n 128 -C knl --ntasks-per-node 128 --cpu_bind=threads -c 2 ./lmp_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos neigh half neigh/qeq half newton on comm no -v x 8 -v y 8 -v z 6 -v t 100 -in in.reaxc.hns.steps -nocite -log log.lammps.date=17Jan18.model=hns.machine=mutrino.pkg=kokkos_serial_knl.kind=weak.size=128K.node=1.mpi=128.hyper=2
2 srun -n 256 -C knl --ntasks-per-node 128 --cpu_bind=threads -c 2 ./lmp_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos neigh half neigh/qeq half newton on comm no -v x 8 -v y 8 -v z 12 -v t 100 -in in.reaxc.hns.steps -nocite -log log.lammps.date=17Jan18.model=hns.machine=mutrino.pkg=kokkos_serial_knl.kind=weak.size=128K.node=2.mpi=128.hyper=2
4 srun -n 512 -C knl --ntasks-per-node 128 --cpu_bind=threads -c 2 ./lmp_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos neigh half neigh/qeq half newton on comm no -v x 8 -v y 16 -v z 12 -v t 100 -in in.reaxc.hns.steps -nocite -log log.lammps.date=17Jan18.model=hns.machine=mutrino.pkg=kokkos_serial_knl.kind=weak.size=128K.node=4.mpi=128.hyper=2
8 srun -n 1024 -C knl --ntasks-per-node 128 --cpu_bind=threads -c 2 ./lmp_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos neigh half neigh/qeq half newton on comm no -v x 16 -v y 16 -v z 12 -v t 100 -in in.reaxc.hns.steps -nocite -log log.lammps.date=17Jan18.model=hns.machine=mutrino.pkg=kokkos_serial_knl.kind=weak.size=128K.node=8.mpi=128.hyper=2
16 srun -n 2048 -C knl --ntasks-per-node 128 --cpu_bind=threads -c 2 ./lmp_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos neigh half neigh/qeq half newton on comm no -v x 16 -v y 16 -v z 24 -v t 100 -in in.reaxc.hns.steps -nocite -log log.lammps.date=17Jan18.model=hns.machine=mutrino.pkg=kokkos_serial_knl.kind=weak.size=128K.node=16.mpi=128.hyper=2
32 srun -n 4096 -C knl --ntasks-per-node 128 --cpu_bind=threads -c 2 ./lmp_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos neigh half neigh/qeq half newton on comm no -v x 16 -v y 32 -v z 24 -v t 100 -in in.reaxc.hns.steps -nocite -log log.lammps.date=17Jan18.model=hns.machine=mutrino.pkg=kokkos_serial_knl.kind=weak.size=128K.node=32.mpi=128.hyper=2
64 srun -n 8192 -C knl --ntasks-per-node 128 --cpu_bind=threads -c 2 ./lmp_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos neigh half neigh/qeq half newton on comm no -v x 32 -v y 32 -v z 24 -v t 100 -in in.reaxc.hns.steps -nocite -log log.lammps.date=17Jan18.model=hns.machine=mutrino.pkg=kokkos_serial_knl.kind=weak.size=128K.node=64.mpi=128.hyper=2