Currently Loaded Modulefiles: 1) modules/3.2.11.4 2) cge/3.2.1463_r03f4dfb_fe3.3.0_2019062614 3) Base-opts/2.4.142-7.0.2.1_2.21__g8f27585.ari 4) gcc/8.3.0 5) craype-haswell 6) craype-network-aries 7) craype/2.7.0 8) cray-mpich/7.7.15 9) slurm/20.02.6-1 10) xalt/2.8.10 11) cray-libsci/20.06.1 12) pmi/5.0.16 13) atp/3.7.4 14) rca/2.2.20-7.0.2.1_2.27__g8e3fb5b.ari 15) perftools-base/20.08.0 16) PrgEnv-gnu/6.0.8 17) daint-gpu 18) cudatoolkit/10.2.89_3.28-7.0.2.1_2.17__g52c0314 19) CMake/3.18.4 Previous HEAD position was 05cab50ec Merge branch 'master' of https://github.com/hfp/libxsmm HEAD is now at 02d6ab213 Introduced LIBXSMM_MKL_VERSION3 (calculation of INTEL_MKL_VERSION is not stable over time). This fixes once more some NOTHROW issue this time caused by inconsistent INTEL_MKL_VERSION. ================================================================================ LIBXSMM master-1.16.1-1085 (Linux@nid00563) -------------------------------------------------------------------------------- GNU Compiler Collection: gcc 8.3.0, g++ 8.3.0, and gfortran 8.3.0 C / C++ target: -msse4.2 Fortran target: -msse4.2 -------------------------------------------------------------------------------- --- LIBXSMM build log gcc -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -D__BLAS=0 -Werror -c ./src/libxsmm_ext.c -o obj/intel64/libxsmm_noblas.o gfortran -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -fPIC -ffree-form -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -msse4.2 -c include/libxsmm.f -o obj/intel64/libxsmm-mod.o -J include /usr/bin/gcc-ar -rs lib/libxsmmnoblas.a obj/intel64/libxsmm_noblas.o /usr/lib64/gcc/x86_64-suse-linux/7/../../../../x86_64-suse-linux/bin/ar: creating lib/libxsmmnoblas.a gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -Iobj -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_main.c -o obj/intel64/libxsmm_main.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_malloc.c -o obj/intel64/libxsmm_malloc.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_memory.c -o obj/intel64/libxsmm_memory.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_hash.c -o obj/intel64/libxsmm_hash.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_math.c -o obj/intel64/libxsmm_math.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_sync.c -o obj/intel64/libxsmm_sync.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_python.c -o obj/intel64/libxsmm_python.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_mhd.c -o obj/intel64/libxsmm_mhd.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_timer.c -o obj/intel64/libxsmm_timer.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_perf.c -o obj/intel64/libxsmm_perf.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_gemm.c -o obj/intel64/libxsmm_gemm.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_xcopy.c -o obj/intel64/libxsmm_xcopy.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_blocked_gemm.c -o obj/intel64/libxsmm_blocked_gemm.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_spmdm.c -o obj/intel64/libxsmm_spmdm.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_fsspmdm.c -o obj/intel64/libxsmm_fsspmdm.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_rng.c -o obj/intel64/libxsmm_rng.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_dnn.c -o obj/intel64/libxsmm_dnn.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_dnn_tensor.c -o obj/intel64/libxsmm_dnn_tensor.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_dnn_convolution.c -o obj/intel64/libxsmm_dnn_convolution.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_dnn_elementwise.c -o obj/intel64/libxsmm_dnn_elementwise.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_dnn_rnncell.c -o obj/intel64/libxsmm_dnn_rnncell.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_dnn_rnncell_forward.c -o obj/intel64/libxsmm_dnn_rnncell_forward.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_dnn_rnncell_backward_weight_update.c -o obj/intel64/libxsmm_dnn_rnncell_backward_weight_update.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_dnn_fusedbatchnorm.c -o obj/intel64/libxsmm_dnn_fusedbatchnorm.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_dnn_fusedbatchnorm_forward.c -o obj/intel64/libxsmm_dnn_fusedbatchnorm_forward.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_dnn_fusedbatchnorm_backward.c -o obj/intel64/libxsmm_dnn_fusedbatchnorm_backward.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_dnn_fusedgroupnorm.c -o obj/intel64/libxsmm_dnn_fusedgroupnorm.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_dnn_fusedgroupnorm_forward.c -o obj/intel64/libxsmm_dnn_fusedgroupnorm_forward.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_dnn_fusedgroupnorm_backward.c -o obj/intel64/libxsmm_dnn_fusedgroupnorm_backward.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_dnn_pooling.c -o obj/intel64/libxsmm_dnn_pooling.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_dnn_pooling_forward.c -o obj/intel64/libxsmm_dnn_pooling_forward.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_dnn_pooling_backward.c -o obj/intel64/libxsmm_dnn_pooling_backward.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_dnn_convolution_forward.c -o obj/intel64/libxsmm_dnn_convolution_forward.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_dnn_fullyconnected.c -o obj/intel64/libxsmm_dnn_fullyconnected.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_dnn_fullyconnected_forward.c -o obj/intel64/libxsmm_dnn_fullyconnected_forward.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_dnn_fullyconnected_backward_weight_update.c -o obj/intel64/libxsmm_dnn_fullyconnected_backward_weight_update.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_dnn_convolution_backward.c -o obj/intel64/libxsmm_dnn_convolution_backward.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_dnn_convolution_weight_update.c -o obj/intel64/libxsmm_dnn_convolution_weight_update.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_dnn_softmaxloss.c -o obj/intel64/libxsmm_dnn_softmaxloss.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_dnn_softmaxloss_forward.c -o obj/intel64/libxsmm_dnn_softmaxloss_forward.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_dnn_softmaxloss_backward.c -o obj/intel64/libxsmm_dnn_softmaxloss_backward.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_dnn_optimizer.c -o obj/intel64/libxsmm_dnn_optimizer.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_dnn_optimizer_sgd.c -o obj/intel64/libxsmm_dnn_optimizer_sgd.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_x86_instructions.c -o obj/intel64/generator_x86_instructions.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_transpose_avx_avx512.c -o obj/intel64/generator_transpose_avx_avx512.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_transpose.c -o obj/intel64/generator_transpose.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_spgemm_csr_reader.c -o obj/intel64/generator_spgemm_csr_reader.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_spgemm_csr_asparse_reg.c -o obj/intel64/generator_spgemm_csr_asparse_reg.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_spgemm_csr_asparse.c -o obj/intel64/generator_spgemm_csr_asparse.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_spgemm_csc_reader.c -o obj/intel64/generator_spgemm_csc_reader.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_spgemm_csc_bsparse.c -o obj/intel64/generator_spgemm_csc_bsparse.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_spgemm_csc_asparse.c -o obj/intel64/generator_spgemm_csc_asparse.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_spgemm.c -o obj/intel64/generator_spgemm.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_packed_trsm_avx_avx512.c -o obj/intel64/generator_packed_trsm_avx_avx512.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_packed_trmm_avx_avx512.c -o obj/intel64/generator_packed_trmm_avx_avx512.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_packed_spgemm_csr_bsparse_avx_avx2_avx512.c -o obj/intel64/generator_packed_spgemm_csr_bsparse_avx_avx2_avx512.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_packed_spgemm_csr_bsparse_aarch64.c -o obj/intel64/generator_packed_spgemm_csr_bsparse_aarch64.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_packed_spgemm_csr_bsparse.c -o obj/intel64/generator_packed_spgemm_csr_bsparse.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_packed_spgemm_csr_asparse_avx_avx2_avx512.c -o obj/intel64/generator_packed_spgemm_csr_asparse_avx_avx2_avx512.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_packed_spgemm_csr_asparse_aarch64.c -o obj/intel64/generator_packed_spgemm_csr_asparse_aarch64.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_packed_spgemm_csr_asparse.c -o obj/intel64/generator_packed_spgemm_csr_asparse.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_packed_spgemm_csc_csparse_avx_avx2_avx512.c -o obj/intel64/generator_packed_spgemm_csc_csparse_avx_avx2_avx512.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_packed_spgemm_csc_csparse.c -o obj/intel64/generator_packed_spgemm_csc_csparse.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_packed_spgemm_csc_bsparse_avx_avx2_avx512.c -o obj/intel64/generator_packed_spgemm_csc_bsparse_avx_avx2_avx512.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_packed_spgemm_csc_bsparse_aarch64.c -o obj/intel64/generator_packed_spgemm_csc_bsparse_aarch64.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_packed_spgemm_csc_bsparse.c -o obj/intel64/generator_packed_spgemm_csc_bsparse.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_packed_spgemm.c -o obj/intel64/generator_packed_spgemm.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_packed_getrf_avx_avx512.c -o obj/intel64/generator_packed_getrf_avx_avx512.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_packed_gemm_bc_rm_avx_avx2_avx512.c -o obj/intel64/generator_packed_gemm_bc_rm_avx_avx2_avx512.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_packed_gemm_bc_rm_aarch64.c -o obj/intel64/generator_packed_gemm_bc_rm_aarch64.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_packed_gemm_bc_rm.c -o obj/intel64/generator_packed_gemm_bc_rm.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_packed_gemm_avx_avx512.c -o obj/intel64/generator_packed_gemm_avx_avx512.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_packed_gemm_ac_rm_avx_avx2_avx512.c -o obj/intel64/generator_packed_gemm_ac_rm_avx_avx2_avx512.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_packed_gemm_ac_rm_aarch64.c -o obj/intel64/generator_packed_gemm_ac_rm_aarch64.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_packed_gemm_ac_rm.c -o obj/intel64/generator_packed_gemm_ac_rm.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_packed.c -o obj/intel64/generator_packed.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_mateltwise_unary_binary_avx_avx512.c -o obj/intel64/generator_mateltwise_unary_binary_avx_avx512.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_mateltwise_transform_avx_avx512.c -o obj/intel64/generator_mateltwise_transform_avx_avx512.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_mateltwise_scale_avx_avx512.c -o obj/intel64/generator_mateltwise_scale_avx_avx512.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_mateltwise_relu_avx_avx512.c -o obj/intel64/generator_mateltwise_relu_avx_avx512.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_mateltwise_reduce_avx_avx512.c -o obj/intel64/generator_mateltwise_reduce_avx_avx512.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_mateltwise_dropout_avx_avx512.c -o obj/intel64/generator_mateltwise_dropout_avx_avx512.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_mateltwise_cvtfp32bf16_act_avx_avx512.c -o obj/intel64/generator_mateltwise_cvtfp32bf16_act_avx_avx512.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_mateltwise_copy_avx_avx512.c -o obj/intel64/generator_mateltwise_copy_avx_avx512.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_mateltwise_avx_avx512.c -o obj/intel64/generator_mateltwise_avx_avx512.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_mateltwise.c -o obj/intel64/generator_mateltwise.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_matcopy_avx_avx512.c -o obj/intel64/generator_matcopy_avx_avx512.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_matcopy.c -o obj/intel64/generator_matcopy.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_gemm_sse_microkernel.c -o obj/intel64/generator_gemm_sse_microkernel.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_gemm_sse_avx_avx2_avx512.c -o obj/intel64/generator_gemm_sse_avx_avx2_avx512.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_gemm_noarch.c -o obj/intel64/generator_gemm_noarch.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_gemm_common_aarch64.c -o obj/intel64/generator_gemm_common_aarch64.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_gemm_common.c -o obj/intel64/generator_gemm_common.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_gemm_avx_microkernel.c -o obj/intel64/generator_gemm_avx_microkernel.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_gemm_avx512_microkernel.c -o obj/intel64/generator_gemm_avx512_microkernel.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_gemm_avx2_microkernel.c -o obj/intel64/generator_gemm_avx2_microkernel.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_gemm_amx_microkernel_emu.c -o obj/intel64/generator_gemm_amx_microkernel_emu.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_gemm_amx_microkernel.c -o obj/intel64/generator_gemm_amx_microkernel.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_gemm_amx_emu.c -o obj/intel64/generator_gemm_amx_emu.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_gemm_amx.c -o obj/intel64/generator_gemm_amx.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_gemm_aarch64.c -o obj/intel64/generator_gemm_aarch64.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_gemm.c -o obj/intel64/generator_gemm.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_common_x86.c -o obj/intel64/generator_common_x86.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_common_aarch64.c -o obj/intel64/generator_common_aarch64.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_common.c -o obj/intel64/generator_common.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/generator_aarch64_instructions.c -o obj/intel64/generator_aarch64_instructions.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_cpuid_x86.c -o obj/intel64/libxsmm_cpuid_x86.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_generator.c -o obj/intel64/libxsmm_generator.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_trace.c -o obj/intel64/libxsmm_trace.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -DLIBXSMM_BUILD_EXT -fopenmp -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_ext.c -o obj/intel64/libxsmm_ext.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -DLIBXSMM_BUILD_EXT -fopenmp -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_ext_xcopy.c -o obj/intel64/libxsmm_ext_xcopy.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -DLIBXSMM_BUILD_EXT -fopenmp -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_ext_blocked_gemm.c -o obj/intel64/libxsmm_ext_blocked_gemm.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -DLIBXSMM_BUILD_EXT -fopenmp -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_ext_gemm.c -o obj/intel64/libxsmm_ext_gemm.o gcc -DNDEBUG -D__STATIC=1 -DLIBXSMM_OPENMP_SIMD -DLIBXSMM_BUILD=2 -Iinclude -I./src -msse4.2 -fPIC -Wall -O2 -fopenmp-simd -funroll-loops -ftree-vectorize -fdata-sections -ffunction-sections -fvisibility=hidden -pthread -Werror -c ./src/libxsmm_generator_gemm_driver.c -o obj/intel64/libxsmm_generator_gemm_driver.o /usr/bin/gcc-ar -rs lib/libxsmmgen.a obj/intel64/generator_x86_instructions.o obj/intel64/generator_transpose_avx_avx512.o obj/intel64/generator_transpose.o obj/intel64/generator_spgemm_csr_reader.o obj/intel64/generator_spgemm_csr_asparse_reg.o obj/intel64/generator_spgemm_csr_asparse.o obj/intel64/generator_spgemm_csc_reader.o obj/intel64/generator_spgemm_csc_bsparse.o obj/intel64/generator_spgemm_csc_asparse.o obj/intel64/generator_spgemm.o obj/intel64/generator_packed_trsm_avx_avx512.o obj/intel64/generator_packed_trmm_avx_avx512.o obj/intel64/generator_packed_spgemm_csr_bsparse_avx_avx2_avx512.o obj/intel64/generator_packed_spgemm_csr_bsparse_aarch64.o obj/intel64/generator_packed_spgemm_csr_bsparse.o obj/intel64/generator_packed_spgemm_csr_asparse_avx_avx2_avx512.o obj/intel64/generator_packed_spgemm_csr_asparse_aarch64.o obj/intel64/generator_packed_spgemm_csr_asparse.o obj/intel64/generator_packed_spgemm_csc_csparse_avx_avx2_avx512.o obj/intel64/generator_packed_spgemm_csc_csparse.o obj/intel64/generator_packed_spgemm_csc_bsparse_avx_avx2_avx512.o obj/intel64/generator_packed_spgemm_csc_bsparse_aarch64.o obj/intel64/generator_packed_spgemm_csc_bsparse.o obj/intel64/generator_packed_spgemm.o obj/intel64/generator_packed_getrf_avx_avx512.o obj/intel64/generator_packed_gemm_bc_rm_avx_avx2_avx512.o obj/intel64/generator_packed_gemm_bc_rm_aarch64.o obj/intel64/generator_packed_gemm_bc_rm.o obj/intel64/generator_packed_gemm_avx_avx512.o obj/intel64/generator_packed_gemm_ac_rm_avx_avx2_avx512.o obj/intel64/generator_packed_gemm_ac_rm_aarch64.o obj/intel64/generator_packed_gemm_ac_rm.o obj/intel64/generator_packed.o obj/intel64/generator_mateltwise_unary_binary_avx_avx512.o obj/intel64/generator_mateltwise_transform_avx_avx512.o obj/intel64/generator_mateltwise_scale_avx_avx512.o obj/intel64/generator_mateltwise_relu_avx_avx512.o obj/intel64/generator_mateltwise_reduce_avx_avx512.o obj/intel64/generator_mateltwise_dropout_avx_avx512.o obj/intel64/generator_mateltwise_cvtfp32bf16_act_avx_avx512.o obj/intel64/generator_mateltwise_copy_avx_avx512.o obj/intel64/generator_mateltwise_avx_avx512.o obj/intel64/generator_mateltwise.o obj/intel64/generator_matcopy_avx_avx512.o obj/intel64/generator_matcopy.o obj/intel64/generator_gemm_sse_microkernel.o obj/intel64/generator_gemm_sse_avx_avx2_avx512.o obj/intel64/generator_gemm_noarch.o obj/intel64/generator_gemm_common_aarch64.o obj/intel64/generator_gemm_common.o obj/intel64/generator_gemm_avx_microkernel.o obj/intel64/generator_gemm_avx512_microkernel.o obj/intel64/generator_gemm_avx2_microkernel.o obj/intel64/generator_gemm_amx_microkernel_emu.o obj/intel64/generator_gemm_amx_microkernel.o obj/intel64/generator_gemm_amx_emu.o obj/intel64/generator_gemm_amx.o obj/intel64/generator_gemm_aarch64.o obj/intel64/generator_gemm.o obj/intel64/generator_common_x86.o obj/intel64/generator_common_aarch64.o obj/intel64/generator_common.o obj/intel64/generator_aarch64_instructions.o obj/intel64/libxsmm_cpuid_x86.o obj/intel64/libxsmm_generator.o obj/intel64/libxsmm_trace.o /usr/lib64/gcc/x86_64-suse-linux/7/../../../../x86_64-suse-linux/bin/ar: creating lib/libxsmmgen.a gcc -o bin/libxsmm_gemm_generator obj/intel64/libxsmm_generator_gemm_driver.o -L/users/jenkg90/libxsmm/lib/ -lxsmmgen \ -Wl,--gc-sections -Wl,-z,relro,-z,now -lm -lrt -ldl -s -pthread /usr/bin/gcc-ar -rs lib/libxsmm.a obj/intel64/libxsmm_main.o obj/intel64/libxsmm_memory.o obj/intel64/libxsmm_malloc.o obj/intel64/libxsmm_hash.o obj/intel64/libxsmm_math.o obj/intel64/libxsmm_sync.o obj/intel64/libxsmm_python.o obj/intel64/libxsmm_mhd.o obj/intel64/libxsmm_timer.o obj/intel64/libxsmm_perf.o obj/intel64/libxsmm_gemm.o obj/intel64/libxsmm_xcopy.o obj/intel64/libxsmm_blocked_gemm.o obj/intel64/libxsmm_spmdm.o obj/intel64/libxsmm_fsspmdm.o obj/intel64/libxsmm_rng.o obj/intel64/libxsmm_dnn.o obj/intel64/libxsmm_dnn_tensor.o obj/intel64/libxsmm_dnn_convolution.o obj/intel64/libxsmm_dnn_elementwise.o obj/intel64/libxsmm_dnn_rnncell.o obj/intel64/libxsmm_dnn_rnncell_forward.o obj/intel64/libxsmm_dnn_rnncell_backward_weight_update.o obj/intel64/libxsmm_dnn_fusedbatchnorm.o obj/intel64/libxsmm_dnn_fusedbatchnorm_forward.o obj/intel64/libxsmm_dnn_fusedbatchnorm_backward.o obj/intel64/libxsmm_dnn_fusedgroupnorm.o obj/intel64/libxsmm_dnn_fusedgroupnorm_forward.o obj/intel64/libxsmm_dnn_fusedgroupnorm_backward.o obj/intel64/libxsmm_dnn_pooling.o obj/intel64/libxsmm_dnn_pooling_forward.o obj/intel64/libxsmm_dnn_pooling_backward.o obj/intel64/libxsmm_dnn_convolution_forward.o obj/intel64/libxsmm_dnn_fullyconnected.o obj/intel64/libxsmm_dnn_fullyconnected_forward.o obj/intel64/libxsmm_dnn_fullyconnected_backward_weight_update.o obj/intel64/libxsmm_dnn_convolution_backward.o obj/intel64/libxsmm_dnn_convolution_weight_update.o obj/intel64/libxsmm_dnn_softmaxloss.o obj/intel64/libxsmm_dnn_softmaxloss_forward.o obj/intel64/libxsmm_dnn_softmaxloss_backward.o obj/intel64/libxsmm_dnn_optimizer.o obj/intel64/libxsmm_dnn_optimizer_sgd.o obj/intel64/generator_x86_instructions.o obj/intel64/generator_transpose_avx_avx512.o obj/intel64/generator_transpose.o obj/intel64/generator_spgemm_csr_reader.o obj/intel64/generator_spgemm_csr_asparse_reg.o obj/intel64/generator_spgemm_csr_asparse.o obj/intel64/generator_spgemm_csc_reader.o obj/intel64/generator_spgemm_csc_bsparse.o obj/intel64/generator_spgemm_csc_asparse.o obj/intel64/generator_spgemm.o obj/intel64/generator_packed_trsm_avx_avx512.o obj/intel64/generator_packed_trmm_avx_avx512.o obj/intel64/generator_packed_spgemm_csr_bsparse_avx_avx2_avx512.o obj/intel64/generator_packed_spgemm_csr_bsparse_aarch64.o obj/intel64/generator_packed_spgemm_csr_bsparse.o obj/intel64/generator_packed_spgemm_csr_asparse_avx_avx2_avx512.o obj/intel64/generator_packed_spgemm_csr_asparse_aarch64.o obj/intel64/generator_packed_spgemm_csr_asparse.o obj/intel64/generator_packed_spgemm_csc_csparse_avx_avx2_avx512.o obj/intel64/generator_packed_spgemm_csc_csparse.o obj/intel64/generator_packed_spgemm_csc_bsparse_avx_avx2_avx512.o obj/intel64/generator_packed_spgemm_csc_bsparse_aarch64.o obj/intel64/generator_packed_spgemm_csc_bsparse.o obj/intel64/generator_packed_spgemm.o obj/intel64/generator_packed_getrf_avx_avx512.o obj/intel64/generator_packed_gemm_bc_rm_avx_avx2_avx512.o obj/intel64/generator_packed_gemm_bc_rm_aarch64.o obj/intel64/generator_packed_gemm_bc_rm.o obj/intel64/generator_packed_gemm_avx_avx512.o obj/intel64/generator_packed_gemm_ac_rm_avx_avx2_avx512.o obj/intel64/generator_packed_gemm_ac_rm_aarch64.o obj/intel64/generator_packed_gemm_ac_rm.o obj/intel64/generator_packed.o obj/intel64/generator_mateltwise_unary_binary_avx_avx512.o obj/intel64/generator_mateltwise_transform_avx_avx512.o obj/intel64/generator_mateltwise_scale_avx_avx512.o obj/intel64/generator_mateltwise_relu_avx_avx512.o obj/intel64/generator_mateltwise_reduce_avx_avx512.o obj/intel64/generator_mateltwise_dropout_avx_avx512.o obj/intel64/generator_mateltwise_cvtfp32bf16_act_avx_avx512.o obj/intel64/generator_mateltwise_copy_avx_avx512.o obj/intel64/generator_mateltwise_avx_avx512.o obj/intel64/generator_mateltwise.o obj/intel64/generator_matcopy_avx_avx512.o obj/intel64/generator_matcopy.o obj/intel64/generator_gemm_sse_microkernel.o obj/intel64/generator_gemm_sse_avx_avx2_avx512.o obj/intel64/generator_gemm_noarch.o obj/intel64/generator_gemm_common_aarch64.o obj/intel64/generator_gemm_common.o obj/intel64/generator_gemm_avx_microkernel.o obj/intel64/generator_gemm_avx512_microkernel.o obj/intel64/generator_gemm_avx2_microkernel.o obj/intel64/generator_gemm_amx_microkernel_emu.o obj/intel64/generator_gemm_amx_microkernel.o obj/intel64/generator_gemm_amx_emu.o obj/intel64/generator_gemm_amx.o obj/intel64/generator_gemm_aarch64.o obj/intel64/generator_gemm.o obj/intel64/generator_common_x86.o obj/intel64/generator_common_aarch64.o obj/intel64/generator_common.o obj/intel64/generator_aarch64_instructions.o obj/intel64/libxsmm_cpuid_x86.o obj/intel64/libxsmm_generator.o obj/intel64/libxsmm_trace.o /usr/lib64/gcc/x86_64-suse-linux/7/../../../../x86_64-suse-linux/bin/ar: creating lib/libxsmm.a /usr/bin/gcc-ar -rs lib/libxsmmf.a obj/intel64/libxsmm-mod.o /usr/bin/gcc-ar -rs lib/libxsmmext.a obj/intel64/libxsmm_ext.o obj/intel64/libxsmm_ext_xcopy.o obj/intel64/libxsmm_ext_blocked_gemm.o obj/intel64/libxsmm_ext_gemm.o /usr/lib64/gcc/x86_64-suse-linux/7/../../../../x86_64-suse-linux/bin/ar: creating lib/libxsmmext.a /usr/lib64/gcc/x86_64-suse-linux/7/../../../../x86_64-suse-linux/bin/ar: creating lib/libxsmmf.a ================================================================================ LIBXSMM master-1.16.1-1085 (Linux@nid00563) -------------------------------------------------------------------------------- GNU Compiler Collection: gcc 8.3.0, g++ 8.3.0, and gfortran 8.3.0 C / C++ target: -msse4.2 Fortran target: -msse4.2 -------------------------------------------------------------------------------- + umask 0002 + mkdir -p /scratch/snx3000/jenkg90/jenkins-g90-DBCSR-691.ocl + chmod 0775 /scratch/snx3000/jenkg90/jenkins-g90-DBCSR-691.ocl + cd /scratch/snx3000/jenkg90/jenkins-g90-DBCSR-691.ocl + export NVSDKCOMPUTE_ROOT=/opt/nvidia/cudatoolkit10.2/10.2.89_3.28-7.0.2.1_2.17__g52c0314 + NVSDKCOMPUTE_ROOT=/opt/nvidia/cudatoolkit10.2/10.2.89_3.28-7.0.2.1_2.17__g52c0314 + export PKG_CONFIG_PATH=/users/jenkg90/libxsmm/lib:/opt/nvidia/cudatoolkit10.2/10.2.89_3.28-7.0.2.1_2.17__g52c0314/lib64/pkgconfig:/opt/cray/rca/2.2.20-7.0.2.1_2.27__g8e3fb5b.ari/lib64/pkgconfig:/opt/cray/pe/pmi/5.0.16/lib64/pkgconfig:/opt/cray/pe/craype/2.7.0/pkg-config:/opt/cray/pe/iobuf/2.0.10/lib/pkgconfig:/opt/cray/pe/fftw/2.1.5.9/lib/pkgconfig:/opt/cray/cge/3.2.1463_r03f4dfb_fe3.3.0_2019062614/lib/pkgconfig:/opt/cray/pe/atp/3.7.4/lib/pkgconfig + PKG_CONFIG_PATH=/users/jenkg90/libxsmm/lib:/opt/nvidia/cudatoolkit10.2/10.2.89_3.28-7.0.2.1_2.17__g52c0314/lib64/pkgconfig:/opt/cray/rca/2.2.20-7.0.2.1_2.27__g8e3fb5b.ari/lib64/pkgconfig:/opt/cray/pe/pmi/5.0.16/lib64/pkgconfig:/opt/cray/pe/craype/2.7.0/pkg-config:/opt/cray/pe/iobuf/2.0.10/lib/pkgconfig:/opt/cray/pe/fftw/2.1.5.9/lib/pkgconfig:/opt/cray/cge/3.2.1463_r03f4dfb_fe3.3.0_2019062614/lib/pkgconfig:/opt/cray/pe/atp/3.7.4/lib/pkgconfig + tee -a build.out ++ command -v srun /var/spool/slurmd/job29142323/slurm_script: line 40: SLURM_NTASKS: unbound variable