From cf26e5faf57bc57e1c58e0886092baf047990761 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Wed, 4 Sep 2019 19:16:32 -0700 Subject: [PATCH] Use transform function on llvm::Module in the ExecutionEngine The refactoring of ExecutionEngine dropped the usage of the irTransform function used to pass -O3 and other options to LLVM. As a consequence, the proper optimizations do not kick in in LLMV-land. This CL makes use of the transform function and allows producing avx512 instructions, on an internal example, when using: `mlir-cpu-runner -dump-object-file=1 -object-filename=foo.o` combined with `objdump -D foo.o`. Assembly produced resembles: ``` 2b2e: 62 72 7d 48 18 04 0e vbroadcastss (%rsi,%rcx,1),%zmm8 2b35: 62 71 7c 48 28 ce vmovaps %zmm6,%zmm9 2b3b: 62 72 3d 48 a8 c9 vfmadd213ps %zmm1,%zmm8,%zmm9 2b41: 62 f1 7c 48 28 cf vmovaps %zmm7,%zmm1 2b47: 62 f2 3d 48 a8 c8 vfmadd213ps %zmm0,%zmm8,%zmm1 2b4d: 62 f2 7d 48 18 44 0e vbroadcastss 0x4(%rsi,%rcx,1),%zmm0 2b54: 01 2b55: 62 71 7c 48 28 c6 vmovaps %zmm6,%zmm8 2b5b: 62 72 7d 48 a8 c3 vfmadd213ps %zmm3,%zmm0,%zmm8 2b61: 62 f1 7c 48 28 df vmovaps %zmm7,%zmm3 2b67: 62 f2 7d 48 a8 da vfmadd213ps %zmm2,%zmm0,%zmm3 2b6d: 62 f2 7d 48 18 44 0e vbroadcastss 0x8(%rsi,%rcx,1),%zmm0 2b74: 02 2b75: 62 f2 7d 48 a8 f5 vfmadd213ps %zmm5,%zmm0,%zmm6 2b7b: 62 f2 7d 48 a8 fc vfmadd213ps %zmm4,%zmm0,%zmm7 ``` etc. Fixes tensorflow/mlir#120 PiperOrigin-RevId: 267281097 --- mlir/lib/ExecutionEngine/ExecutionEngine.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mlir/lib/ExecutionEngine/ExecutionEngine.cpp b/mlir/lib/ExecutionEngine/ExecutionEngine.cpp index 08b808674a05..f991d7dff17e 100644 --- a/mlir/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/mlir/lib/ExecutionEngine/ExecutionEngine.cpp @@ -277,6 +277,9 @@ Expected> ExecutionEngine::create( // Add a ThreadSafemodule to the engine and return. ThreadSafeModule tsm(std::move(deserModule), std::move(ctx)); + if (transformer) + cantFail(tsm.withModuleDo( + [&](llvm::Module &module) { return transformer(&module); })); cantFail(jit->addIRModule(std::move(tsm))); engine->jit = std::move(jit);