84 lines
2.8 KiB
Python
84 lines
2.8 KiB
Python
def get_blas_gomp_arch_deps():
|
|
return [
|
|
("x86_64", [
|
|
"third-party//IntelComposerXE:{}".format(native.read_config("fbcode", "mkl_lp64", "mkl_lp64_omp")),
|
|
]),
|
|
("aarch64", [
|
|
"third-party//OpenBLAS:OpenBLAS",
|
|
"third-party//openmp:omp",
|
|
]),
|
|
]
|
|
|
|
default_compiler_flags = [
|
|
"-Wall",
|
|
"-Wextra",
|
|
"-Wno-unused-function",
|
|
"-Wno-unused-parameter",
|
|
"-Wno-error=strict-aliasing",
|
|
"-Wno-shadow-compatible-local",
|
|
"-Wno-maybe-uninitialized", # aten is built with gcc as part of HHVM
|
|
"-Wno-unknown-pragmas",
|
|
"-Wno-strict-overflow",
|
|
# See https://fb.facebook.com/groups/fbcode/permalink/1813348245368673/
|
|
# These trigger on platform007
|
|
"-Wno-stringop-overflow",
|
|
"-Wno-class-memaccess",
|
|
"-DHAVE_MMAP",
|
|
"-DUSE_GCC_ATOMICS=1",
|
|
"-D_FILE_OFFSET_BITS=64",
|
|
"-DHAVE_SHM_OPEN=1",
|
|
"-DHAVE_SHM_UNLINK=1",
|
|
"-DHAVE_MALLOC_USABLE_SIZE=1",
|
|
"-DCPU_CAPABILITY_DEFAULT",
|
|
"-DTH_INDEX_BASE=0",
|
|
"-DMAGMA_V2",
|
|
"-DNO_CUDNN_DESTROY_HANDLE",
|
|
"-DUSE_EXPERIMENTAL_CUDNN_V8_API", # enable cudnn v8 api
|
|
"-DUSE_FBGEMM",
|
|
"-DUSE_QNNPACK",
|
|
"-DUSE_PYTORCH_QNNPACK",
|
|
# The dynamically loaded NVRTC trick doesn't work in fbcode,
|
|
# and it's not necessary anyway, because we have a stub
|
|
# nvrtc library which we load canonically anyway
|
|
"-DUSE_DIRECT_NVRTC",
|
|
"-DUSE_RUY_QMATMUL",
|
|
] + select({
|
|
# XNNPACK depends on an updated version of pthreadpool interface, whose implementation
|
|
# includes <pthread.h> - a header not available on Windows.
|
|
"DEFAULT": ["-DUSE_XNNPACK"],
|
|
"ovr_config//os:windows": [],
|
|
}) + (["-O1"] if native.read_config("fbcode", "build_mode_test_label", "") == "dev-nosan" else [])
|
|
|
|
compiler_specific_flags = {
|
|
"clang": [
|
|
"-Wno-absolute-value",
|
|
"-Wno-pass-failed",
|
|
"-Wno-braced-scalar-init",
|
|
],
|
|
"gcc": [
|
|
"-Wno-error=array-bounds",
|
|
],
|
|
}
|
|
|
|
def get_cpu_parallel_backend_flags():
|
|
parallel_backend = native.read_config("pytorch", "parallel_backend", "openmp")
|
|
defs = []
|
|
if parallel_backend == "openmp":
|
|
defs.append("-DAT_PARALLEL_OPENMP_FBCODE=1")
|
|
elif parallel_backend == "tbb":
|
|
defs.append("-DAT_PARALLEL_NATIVE_TBB_FBCODE=1")
|
|
elif parallel_backend == "native":
|
|
defs.append("-DAT_PARALLEL_NATIVE_FBCODE=1")
|
|
else:
|
|
fail("Unsupported parallel backend: " + parallel_backend)
|
|
if native.read_config("pytorch", "exp_single_thread_pool", "0") == "1":
|
|
defs.append("-DAT_EXPERIMENTAL_SINGLE_THREAD_POOL=1")
|
|
mkl_ver = native.read_config("fbcode", "mkl_lp64", "mkl_lp64_omp")
|
|
if mkl_ver == "mkl_lp64_seq":
|
|
defs.append("-DATEN_MKL_SEQUENTIAL_FBCODE=1")
|
|
return defs
|
|
|
|
def is_cpu_static_dispatch_build():
|
|
mode = native.read_config("fbcode", "caffe2_static_dispatch_mode", "none")
|
|
return mode == "cpu"
|