[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Minios-devel] [UNIKRAFT/LIBDNNL PATCH 2/2] Add Makefile.uk and patches
Add dnnl port Makefile.uk and corresponding patches Signed-off-by: Felipe Huici <felipe.huici@xxxxxxxxx> --- Makefile.uk | 321 ++++++++++++++++++ include/cpuid.h | 158 +++++++++ include/dnnl_config.h | 84 +++++ include/dnnl_version.h | 36 ++ ...001-prevent-clobbering-barrier-macro.patch | 11 + patches/0002-prevent-clobbering-B-macro.patch | 11 + .../0003-prevent-clobbering-nop-macro.patch | 11 + 7 files changed, 632 insertions(+) create mode 100644 Makefile.uk create mode 100644 include/cpuid.h create mode 100644 include/dnnl_config.h create mode 100644 include/dnnl_version.h create mode 100644 patches/0001-prevent-clobbering-barrier-macro.patch create mode 100644 patches/0002-prevent-clobbering-B-macro.patch create mode 100644 patches/0003-prevent-clobbering-nop-macro.patch diff --git a/Makefile.uk b/Makefile.uk new file mode 100644 index 0000000..bb25ee6 --- /dev/null +++ b/Makefile.uk @@ -0,0 +1,321 @@ +# libdnnl Makefile.uk +# +# Authors: Felipe Huici <felipe.huici@xxxxxxxxx> +# +# Copyright (c) 2020, NEC Europe Ltd., NEC Corporation. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# THIS HEADER MAY NOT BE EXTRACTED OR MODIFIED IN ANY WAY. +# + +################################################################################ +# Library registration +################################################################################ +$(eval $(call addlib_s,libdnnl,$(CONFIG_LIBDNNL))) + +ifeq ($(CONFIG_LIBDNNL),y) +ifneq ($(CONFIG_LIBCOMPILER_RT),y) +$(error Require libcompiler_rt) +endif +endif + +################################################################################ +# Sources +################################################################################ +LIBDNNL_VERSION=1.2 +LIBDNNL_URL=https://github.com/intel/mkl-dnn/archive/v$(LIBDNNL_VERSION).zip +LIBDNNL_PATCHDIR=$(LIBDNNL_BASE)/patches +$(eval $(call fetch,libdnnl,$(LIBDNNL_URL))) +$(eval $(call patch,libdnnl,$(LIBDNNL_PATCHDIR),mkl-dnn-$(LIBDNNL_VERSION))) + +################################################################################ +# Helpers +################################################################################ +LIBDNNL_SUBDIR=mkl-dnn-$(LIBDNNL_VERSION) +LIBDNNL_SRC=$(LIBDNNL_ORIGIN)/$(LIBDNNL_SUBDIR) + +################################################################################ +# Library includes +################################################################################ +LIBMKL-INCLUDES += -I$(LIBDNNL_SRC)/include \ + -I$(LIBDNNL_SRC)/src \ + -I$(LIBDNNL_SRC)/src/cpu \ + -I$(LIBDNNL_SRC)/src/cpu/gemm \ + -I$(LIBDNNL_SRC)/src/common \ + -I$(LIBDNNL_BASE)/include \ + +CINCLUDES-$(CONFIG_LIBDNNL) += $(LIBMKL-INCLUDES) +CXXINCLUDES-$(CONFIG_LIBDNNL) += $(LIBMKL-INCLUDES) + +################################################################################ +# Global flags +################################################################################ +LIBDNNL-CONFIG_FLAGS = -DCMAKE_BUILD_TYPE=Release \ + -DDNNL_ENABLE_CONCURRENT_EXEC=OFF \ + -DDNNL_LIBRARY_TYPE=STATIC \ + -DDNNL_THREADING=OMP:COMP \ + -DDNNL_USE_MKL=NONE \ + -DDNNL_VERBOSE=ON \ + -DWITH_EXAMPLE=OFF \ + -DWITH_TEST=OFF \ + -DDNNL_ENABLE_JIT_PROFILING=OFF + +LIBDNNL_CFLAGS-y += $(LIBDNNL-CONFIG_FLAGS) +LIBDNNL_CXXFLAGS-y += $(LIBDNNL-CONFIG_FLAGS) + +################################################################################ +# Suppress Flags +################################################################################ +LIBDNNL_SUPPRESS_FLAGS-y += -Wno-unused-parameter \ + -Wno-unused-but-set-parameter \ + -Wno-unknown-pragmas \ + -Wno-extra \ + -Wno-attributes \ + -Wno-parentheses \ + -fcompare-debug-second # supress notes + +LIBDNNL_CFLAGS-y += $(LIBDNNL_SUPPRESS_FLAGS-y) -Wno-implicit-function-declaration +LIBDNNL_CXXFLAGS-y += $(LIBDNNL_SUPPRESS_FLAGS-y) + +################################################################################ +# Sources +################################################################################ +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/common/dnnl_debug.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/common/scratchpad.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/common/batch_normalization.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/common/binary.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/common/lrn.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/common/shuffle.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/common/convolution.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/common/primitive_exec_types.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/common/memory.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/common/pooling.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/common/inner_product.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/common/verbose.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/common/utils.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/common/primitive_attr.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/common/primitive_iterator.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/common/engine.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/common/query.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/common/reorder.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/common/sum.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/common/convolution_pd.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/common/memory_desc_wrapper.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/common/primitive.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/common/rnn.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/common/matmul.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/common/resampling.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/common/layer_normalization.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/common/concat.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/common/primitive_hashing.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/common/dnnl_debug_autogenerated.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/common/deconvolution.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/common/softmax.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/common/stream.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/common/primitive_desc.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/common/eltwise.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/common/memory_zero_pad.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_uni_tbb_batch_normalization.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_avx512_core_f32_wino_conv_4x3.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm_convolution.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/nchw_pooling.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_uni_dw_convolution.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/cpu_batch_normalization_list.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_uni_batch_normalization.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_uni_eltwise.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/ref_shuffle.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/cpu_reorder.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_uni_reorder_utils.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_avx2_convolution.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_avx512_core_x8s8s32x_1x1_conv_kernel.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/cpu_concat.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/cpu_batch_normalization_utils.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_avx512_core_x8s8s32x_conv_kernel.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_uni_batch_normalization_s8.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/cpu_layer_normalization_list.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/cpu_reducer.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/ref_eltwise.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/nspc_batch_normalization.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_avx512_common_lrn.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/ref_convolution.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_avx512_core_x8s8s32x_1x1_convolution.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_avx512_core_f32_wino_conv_2x3.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm_x8s8s32x_convolution.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm_convolution_utils.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/cpu_eltwise_list.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_uni_layer_normalization.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_sse41_1x1_conv_kernel_f32.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_uni_lrn.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_avx512_common_conv_winograd_kernel_f32.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_avx512_core_bf16_dw_conv_kernel.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_sse41_conv_kernel_f32.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_avx512_common_conv_kernel.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_avx512_core_bf16_convolution.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_avx512_core_x8s8s32x_convolution.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_avx512_core_bf16_sum.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/cpu_shuffle_list.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_uni_eltwise_injector.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_uni_pooling.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/cpu_deconvolution_list.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/ref_batch_normalization.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_avx512_core_x8s8s32x_deconvolution.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/cpu_barrier.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm_bf16_inner_product.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_avx2_x8s8s32x_conv_kernel.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm_x8s8s32x_inner_product.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/bfloat16.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_avx512_core_bf16_conv_kernel.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_avx512_core_f32_wino_conv_4x3_kernel.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm_bf16_convolution.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_avx512_core_bf16_1x1_conv_kernel.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/simple_concat.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_avx2_1x1_convolution.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_sse41_convolution.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/cpu_convolution_list.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/cpu_softmax_list.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_transpose_src_utils.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_avx512_core_bf16_1x1_convolution.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/nhwc_pooling.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/cpu_pooling_list.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_uni_reorder.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_avx2_x8s8s32x_1x1_convolution.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/cpu_inner_product_list.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/simple_sum.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_sse41_1x1_convolution.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_uni_i8i8_pooling.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_avx512_common_convolution_winograd.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_avx512_core_u8s8s32x_wino_convolution.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/ref_inner_product.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/cpu_lrn_list.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/ncsp_batch_normalization.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_avx2_conv_kernel_f32.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_uni_softmax.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/ref_softmax.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/ref_pooling.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_uni_dw_conv_kernel_f32.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_avx2_1x1_conv_kernel_f32.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_avx512_common_1x1_conv_kernel.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm_inner_product_utils.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_avx2_x8s8s32x_1x1_conv_kernel.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/ref_layer_normalization.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_avx2_x8s8s32x_convolution.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_avx512_common_convolution.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_avx512_common_1x1_convolution.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/cpu_isa_traits.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/ref_deconvolution.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/cpu_sum.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/cpu_engine.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_uni_pool_kernel.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm_inner_product.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/ref_lrn.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_uni_lrn_kernel_f32.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/resampling/simple_resampling.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/resampling/ref_resampling.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/resampling/cpu_resampling_list.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/binary/cpu_binary_list.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/binary/jit_uni_binary.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/binary/ref_binary.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/binary/jit_uni_i8i8_binary.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/matmul/gemm_x8s8s32x_matmul.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/matmul/cpu_matmul_list.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/matmul/gemm_bf16_matmul.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/matmul/gemm_f32_matmul.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/matmul/ref_matmul.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/rnn/rnn_utils.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/rnn/cell_gru_lbr.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/rnn/ref_rnn.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/rnn/ref_postgemm_rnn.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/rnn/cpu_rnn_list.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/rnn/ref_postgemm_lstm.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/rnn/ref_postgemm_gru_lbr.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/rnn/ref_postgemm_gru.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/rnn/cell_gru.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/rnn/cell_common.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_utils/jit_utils.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_utils/jitprofiling/jitprofiling.c +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/jit_utils/linux_perf/linux_perf.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/gemm_info.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/gemm_pack.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/gemm_driver.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/gemm.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/gemv_driver.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/s8x8s32/jit_avx512_core_u8_copy_bt_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/s8x8s32/jit_avx512_core_gemm_s8u8s32_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/s8x8s32/jit_avx2_u8_copy_at_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/s8x8s32/jit_avx512_core_kernel_gemv_s8x8s32_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/s8x8s32/jit_avx512_core_u8_copy_an_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/s8x8s32/jit_avx2_u8_copy_bn_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/s8x8s32/jit_avx512_core_u8_copy_sum_an_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/s8x8s32/jit_avx2_u8_copy_sum_an_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/s8x8s32/jit_avx2_u8_copy_bt_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/s8x8s32/jit_avx2_u8_copy_sum_at_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/s8x8s32/jit_avx2_u8_copy_an_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/s8x8s32/jit_avx512_core_u8_copy_sum_bt_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/s8x8s32/jit_avx2_u8_copy_sum_bt_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/s8x8s32/jit_avx512_core_gemv_s8x8s32.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/s8x8s32/jit_avx512_core_u8_copy_sum_at_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/s8x8s32/jit_avx512_core_u8_copy_sum_bn_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/s8x8s32/jit_avx2_gemm_s8u8s32_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/s8x8s32/jit_avx2_u8_copy_sum_bn_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/s8x8s32/ref_gemm_s8x8s32.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/s8x8s32/simple_gemm_s8s8s32.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/s8x8s32/jit_avx512_core_u8_copy_at_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/s8x8s32/jit_avx512_core_u8_copy_bn_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/f32/gemm_utils_f32.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/f32/jit_avx512_core_f32_copy_bt_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/f32/jit_avx2_f32_copy_bt_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/f32/jit_avx2_f32_copy_bn_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/f32/jit_sse41_f32_copy_at_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/f32/jit_avx_f32_copy_an_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/f32/jit_avx_f32_copy_bt_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/f32/jit_avx2_kernel_sgemm_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/f32/jit_avx512_core_f32_copy_bn_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/f32/jit_sse41_kernel_sgemm_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/f32/jit_avx_f32_copy_bn_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/f32/jit_avx2_f32_copy_an_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/f32/jit_sse41_gemv_t_f32_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/f32/jit_avx_gemv_t_f32_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/f32/jit_sse41_f32_copy_bt_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/f32/jit_sse41_f32_copy_an_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/f32/jit_avx_f32_copy_at_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/f32/jit_avx512_core_f32_copy_at_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/f32/jit_avx_gemm_f32.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/f32/jit_avx512_common_gemm_f32.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/f32/jit_avx2_f32_copy_at_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/f32/jit_sse41_kernel_b0_sgemm_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/f32/jit_avx_kernel_b0_sgemm_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/f32/ref_gemm_f32.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/f32/jit_sse41_f32_copy_bn_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/f32/jit_avx512_core_f32_copy_an_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/f32/jit_avx_kernel_sgemm_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/bf16/jit_avx512_core_s16_copy_an_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/bf16/jit_avx512_core_s16_copy_bt_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/bf16/jit_avx512_core_s16_copy_at_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/bf16/jit_avx512_core_gemm_bf16bf16f32_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/cpu/gemm/bf16/jit_avx512_core_s16_copy_bn_kern.cpp +LIBDNNL_SRCS-y += $(LIBDNNL_SRC)/src/compute/dispatch.cpp + diff --git a/include/cpuid.h b/include/cpuid.h new file mode 100644 index 0000000..cb75258 --- /dev/null +++ b/include/cpuid.h @@ -0,0 +1,158 @@ +/* Taken from FreeBSD */ + +/*===---- cpuid.h - X86 cpu model detection --------------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#if !(__x86_64__ || __i386__) +#error this header is for x86 only +#endif + +/* Features in %ecx for level 1 */ +#define bit_SSE3 0x00000001 +#define bit_PCLMULQDQ 0x00000002 +#define bit_DTES64 0x00000004 +#define bit_MONITOR 0x00000008 +#define bit_DSCPL 0x00000010 +#define bit_VMX 0x00000020 +#define bit_SMX 0x00000040 +#define bit_EIST 0x00000080 +#define bit_TM2 0x00000100 +#define bit_SSSE3 0x00000200 +#define bit_CNXTID 0x00000400 +#define bit_FMA 0x00001000 +#define bit_CMPXCHG16B 0x00002000 +#define bit_xTPR 0x00004000 +#define bit_PDCM 0x00008000 +#define bit_PCID 0x00020000 +#define bit_DCA 0x00040000 +#define bit_SSE41 0x00080000 +#define bit_SSE42 0x00100000 +#define bit_x2APIC 0x00200000 +#define bit_MOVBE 0x00400000 +#define bit_POPCNT 0x00800000 +#define bit_TSCDeadline 0x01000000 +#define bit_AESNI 0x02000000 +#define bit_XSAVE 0x04000000 +#define bit_OSXSAVE 0x08000000 +#define bit_AVX 0x10000000 +#define bit_RDRAND 0x40000000 + +/* Features in %edx for level 1 */ +#define bit_FPU 0x00000001 +#define bit_VME 0x00000002 +#define bit_DE 0x00000004 +#define bit_PSE 0x00000008 +#define bit_TSC 0x00000010 +#define bit_MSR 0x00000020 +#define bit_PAE 0x00000040 +#define bit_MCE 0x00000080 +#define bit_CX8 0x00000100 +#define bit_APIC 0x00000200 +#define bit_SEP 0x00000800 +#define bit_MTRR 0x00001000 +#define bit_PGE 0x00002000 +#define bit_MCA 0x00004000 +#define bit_CMOV 0x00008000 +#define bit_PAT 0x00010000 +#define bit_PSE36 0x00020000 +#define bit_PSN 0x00040000 +#define bit_CLFSH 0x00080000 +#define bit_DS 0x00200000 +#define bit_ACPI 0x00400000 +#define bit_MMX 0x00800000 +#define bit_FXSR 0x01000000 +#define bit_SSE 0x02000000 +#define bit_SSE2 0x04000000 +#define bit_SS 0x08000000 +#define bit_HTT 0x10000000 +#define bit_TM 0x20000000 +#define bit_PBE 0x80000000 + +/* Features in %ebx for level 7 sub-leaf 0 */ +#define bit_FSGSBASE 0x00000001 +#define bit_SMEP 0x00000080 +#define bit_ENH_MOVSB 0x00000200 + +/* PIC on i386 uses %ebx, so preserve it. */ +#if __i386__ +#define __cpuid(__level, __eax, __ebx, __ecx, __edx) \ + __asm(" pushl %%ebx\n" \ + " cpuid\n" \ + " mov %%ebx,%1\n" \ + " popl %%ebx" \ + : "=a"(__eax), "=r" (__ebx), "=c"(__ecx), "=d"(__edx) \ + : "0"(__level)) + +#define __cpuid_count(__level, __count, __eax, __ebx, __ecx, __edx) \ + __asm(" pushl %%ebx\n" \ + " cpuid\n" \ + " mov %%ebx,%1\n" \ + " popl %%ebx" \ + : "=a"(__eax), "=r" (__ebx), "=c"(__ecx), "=d"(__edx) \ + : "0"(__level), "2"(__count)) +#else +#define __cpuid(__level, __eax, __ebx, __ecx, __edx) \ + __asm("cpuid" : "=a"(__eax), "=b" (__ebx), "=c"(__ecx), "=d"(__edx) \ + : "0"(__level)) + +#define __cpuid_count(__level, __count, __eax, __ebx, __ecx, __edx) \ + __asm("cpuid" : "=a"(__eax), "=b" (__ebx), "=c"(__ecx), "=d"(__edx) \ + : "0"(__level), "2"(__count)) +#endif + +static __inline int __get_cpuid (unsigned int __level, unsigned int *__eax, + unsigned int *__ebx, unsigned int *__ecx, + unsigned int *__edx) { + __cpuid(__level, *__eax, *__ebx, *__ecx, *__edx); + return 1; +} + +static __inline int __get_cpuid_max (unsigned int __level, unsigned int *__sig) +{ + unsigned int __eax, __ebx, __ecx, __edx; +#if __i386__ + int __cpuid_supported; + + __asm(" pushfl\n" + " popl %%eax\n" + " movl %%eax,%%ecx\n" + " xorl $0x00200000,%%eax\n" + " pushl %%eax\n" + " popfl\n" + " pushfl\n" + " popl %%eax\n" + " movl $0,%0\n" + " cmpl %%eax,%%ecx\n" + " je 1f\n" + " movl $1,%0\n" + "1:" + : "=r" (__cpuid_supported) : : "eax", "ecx"); + if (!__cpuid_supported) + return 0; +#endif + + __cpuid(__level, __eax, __ebx, __ecx, __edx); + if (__sig) + *__sig = __ebx; + return __eax; +} diff --git a/include/dnnl_config.h b/include/dnnl_config.h new file mode 100644 index 0000000..a4d58b0 --- /dev/null +++ b/include/dnnl_config.h @@ -0,0 +1,84 @@ +/******************************************************************************* +* Copyright 2019 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#ifndef DNNL_CONFIG_H +#define DNNL_CONFIG_H + +#include "dnnl_types.h" + +/// @cond DO_NOT_DOCUMENT_THIS + +// All symbols shall be internal unless marked as DNNL_API +#if defined _WIN32 || defined __CYGWIN__ +#define DNNL_HELPER_DLL_IMPORT __declspec(dllimport) +#define DNNL_HELPER_DLL_EXPORT __declspec(dllexport) +#else +#if __GNUC__ >= 4 +#define DNNL_HELPER_DLL_IMPORT __attribute__((visibility("default"))) +#define DNNL_HELPER_DLL_EXPORT __attribute__((visibility("default"))) +#else +#define DNNL_HELPER_DLL_IMPORT +#define DNNL_HELPER_DLL_EXPORT +#endif +#endif + +#ifdef DNNL_DLL +#ifdef DNNL_DLL_EXPORTS +#define DNNL_API DNNL_HELPER_DLL_EXPORT +#else +#define DNNL_API DNNL_HELPER_DLL_IMPORT +#endif +#else +#define DNNL_API +#endif + +#if defined(__GNUC__) +#define DNNL_DEPRECATED __attribute__((deprecated)) +#elif defined(_MSC_VER) +#define DNNL_DEPRECATED __declspec(deprecated) +#else +#define DNNL_DEPRECATED +#endif + +/// @endcond + +// clang-format off + +// DNNL CPU threading runtime +#define DNNL_CPU_THREADING_RUNTIME DNNL_RUNTIME_SEQ + +// DNNL CPU engine runtime +#define DNNL_CPU_RUNTIME DNNL_RUNTIME_SEQ + +// DNNL GPU engine runtime +#define DNNL_GPU_RUNTIME DNNL_RUNTIME_NONE + +// clang-format on + +#if defined(DNNL_CPU_RUNTIME) && defined(DNNL_GPU_RUNTIME) +#if (DNNL_CPU_RUNTIME == DNNL_RUNTIME_NONE) \ + || (DNNL_CPU_RUNTIME == DNNL_RUNTIME_OCL) +#error "Unexpected DNNL_CPU_RUNTIME" +#endif +#if (DNNL_GPU_RUNTIME != DNNL_RUNTIME_NONE) \ + && (DNNL_GPU_RUNTIME != DNNL_RUNTIME_OCL) +#error "Unexpected DNNL_GPU_RUNTIME" +#endif +#else +#error "BOTH DNNL_CPU_RUNTIME and DNNL_GPU_RUNTIME must be defined" +#endif + +#endif diff --git a/include/dnnl_version.h b/include/dnnl_version.h new file mode 100644 index 0000000..57715b8 --- /dev/null +++ b/include/dnnl_version.h @@ -0,0 +1,36 @@ +/******************************************************************************* +* Copyright 2019 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#ifndef DNNL_VERSION_H +#define DNNL_VERSION_H + +// clang-format off + +/// Major version +#define DNNL_VERSION_MAJOR 1 + +/// Minor version +#define DNNL_VERSION_MINOR 2 + +/// Patch version +#define DNNL_VERSION_PATCH 0 + +/// Git commit hash +#define DNNL_VERSION_HASH "N/A" + +// clang-format on + +#endif diff --git a/patches/0001-prevent-clobbering-barrier-macro.patch b/patches/0001-prevent-clobbering-barrier-macro.patch new file mode 100644 index 0000000..5a44492 --- /dev/null +++ b/patches/0001-prevent-clobbering-barrier-macro.patch @@ -0,0 +1,11 @@ +--- a/src/cpu/cpu_barrier.hpp 2020-02-01 22:15:52.999902062 +0100 ++++ b/src/cpu/cpu_barrier.hpp 2020-02-01 22:16:03.487787545 +0100 +@@ -22,6 +22,8 @@ + #include "jit_generator.hpp" + #include "utils.hpp" + ++#undef barrier ++ + namespace dnnl { + namespace impl { + namespace cpu { diff --git a/patches/0002-prevent-clobbering-B-macro.patch b/patches/0002-prevent-clobbering-B-macro.patch new file mode 100644 index 0000000..0ae33b0 --- /dev/null +++ b/patches/0002-prevent-clobbering-B-macro.patch @@ -0,0 +1,11 @@ +--- a/src/common/tag_traits.hpp 2020-02-02 18:20:18.349160812 +0100 ++++ b/src/common/tag_traits.hpp 2020-02-02 18:20:32.421006289 +0100 +@@ -22,6 +22,8 @@ + #include "c_types_map.hpp" + #include "utils.hpp" + ++#undef _B ++ + namespace dnnl { + namespace impl { + diff --git a/patches/0003-prevent-clobbering-nop-macro.patch b/patches/0003-prevent-clobbering-nop-macro.patch new file mode 100644 index 0000000..27f7267 --- /dev/null +++ b/patches/0003-prevent-clobbering-nop-macro.patch @@ -0,0 +1,11 @@ +--- a/src/cpu/xbyak/xbyak.h 2020-02-04 15:12:32.586176051 +0100 ++++ b/src/cpu/xbyak/xbyak.h 2020-02-04 15:12:42.758058038 +0100 +@@ -72,6 +72,8 @@ + #include <iostream> + #endif + ++#undef nop ++ + // #define XBYAK_DISABLE_AVX512 + + //#define XBYAK_USE_MMAP_ALLOCATOR -- 2.20.1 _______________________________________________ Minios-devel mailing list Minios-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/minios-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |