llvm-project/compiler-rt/test/fuzzer/dataflow.test

# Tests the data flow tracer.
REQUIRES: linux, x86_64

# Disable, like other dataflow tests.
RUN: false
XFAIL: *

# Build the tracer and the test.
RUN: %no_fuzzer_cpp_compiler -c -fno-sanitize=all -fsanitize=dataflow %S/../../lib/fuzzer/dataflow/DataFlow.cpp -o  %t-DataFlow.o
RUN: %no_fuzzer_cpp_compiler -c -fno-sanitize=all -fPIC                %S/../../lib/fuzzer/dataflow/DataFlowCallbacks.cpp -o  %t-DataFlowCallbacks.o
RUN: %no_fuzzer_cpp_compiler    -fno-sanitize=all -fsanitize=dataflow -fsanitize-coverage=trace-pc-guard,pc-table,bb,trace-cmp   %S/ThreeFunctionsTest.cpp     %t-DataFlow*.o -o %t-ThreeFunctionsTestDF
RUN: %no_fuzzer_cpp_compiler    -fno-sanitize=all -fsanitize=dataflow -fsanitize-coverage=trace-pc-guard,pc-table,bb,trace-cmp   %S/Labels20Test.cpp     %t-DataFlow*.o -o %t-Labels20TestDF
RUN: %cpp_compiler %S/ThreeFunctionsTest.cpp -o %t-ThreeFunctionsTest

# Dump the function list.
RUN:  %t-ThreeFunctionsTestDF 2>&1 | FileCheck %s --check-prefix=FUNC_LIST
FUNC_LIST-DAG: LLVMFuzzerTestOneInput
FUNC_LIST-DAG: Func1
FUNC_LIST-DAG: Func2

# Prepare the inputs.
RUN: rm -rf %t/IN %t/IN20
RUN: mkdir -p %t/IN %t/IN20
RUN: echo -n ABC    > %t/IN/ABC
RUN: echo -n FUABC  > %t/IN/FUABC
RUN: echo -n FUZZR  > %t/IN/FUZZR
RUN: echo -n FUZZM  > %t/IN/FUZZM
RUN: echo -n FUZZMU > %t/IN/FUZZMU
RUN: echo -n 1234567890123456 > %t/IN/1234567890123456

RUN: echo -n FUZZxxxxxxxxxxxxxxxx > %t/IN20/FUZZxxxxxxxxxxxxxxxx
RUN: echo -n FUZZxxxxxxxxxxxxMxxx > %t/IN20/FUZZxxxxxxxxxxxxMxxx
RUN: echo -n FUZxxxxxxxxxxxxxxxxx > %t/IN20/FUZxxxxxxxxxxxxxxxxx
RUN: echo -n FUxxxxxxxxxxxxxxxxxx > %t/IN20/FUxxxxxxxxxxxxxxxxxx


RUN: export DFSAN_OPTIONS=warn_unimplemented=0

# This test assumes that the functions in ThreeFunctionsTestDF are instrumented
# in a specific order:
# LLVMFuzzerTestOneInput: F0
# Func1: F1
# Func2: F2

# ABC: No data is used
RUN:%t-ThreeFunctionsTestDF %t/IN/ABC    | FileCheck %s --check-prefix=IN_ABC
IN_ABC-NOT: F0
IN_ABC: C0
IN_ABC-NOT: C

# FUABC: First 3 bytes are checked, Func1/Func2 are not called.
RUN:%t-ThreeFunctionsTestDF %t/IN/FUABC  | FileCheck %s --check-prefix=IN_FUABC
IN_FUABC: F0 11100{{$}}
IN_FUABC-NOT: F
IN_FUABC-NEXT: C0
IN_FUABC-NOT: C

# FUZZR: 5 bytes are used (4 in one function, 5-th in the other), Func2 is not called.
RUN:%t-ThreeFunctionsTestDF %t/IN/FUZZR  | FileCheck %s --check-prefix=IN_FUZZR
IN_FUZZR: F0 11110
IN_FUZZR: F1 00001
IN_FUZZR-NOT: F
IN_FUZZR: C0
IN_FUZZR: C1
IN_FUZZR-NOT: C

# FUZZM: 5 bytes are used, both Func1 and Func2 are called, Func2 depends only on size.
RUN:%t-ThreeFunctionsTestDF %t/IN/FUZZM  | FileCheck %s --check-prefix=IN_FUZZM
IN_FUZZM: F0 11110
IN_FUZZM: F1 00001
IN_FUZZM-NOT: F2
IN_FUZZM: C0
IN_FUZZM: C1
IN_FUZZM: C2

# FUZZMU: 6 bytes are used, both Func1 and Func2 are called, Func2 depends on byte 6 and size.
RUN:%t-ThreeFunctionsTestDF %t/IN/FUZZMU  | FileCheck %s --check-prefix=IN_FUZZMU


# Test Labels20TestDF
RUN:%t-Labels20TestDF %t/IN20/FUxxxxxxxxxxxxxxxxxx | FileCheck %s --check-prefix=L20_FU
L20_FU: F0 11100000000000000000{{$}}
L20_FU-NOT: F

RUN:%t-Labels20TestDF %t/IN20/FUZxxxxxxxxxxxxxxxxx | FileCheck %s --check-prefix=L20_FUZ
L20_FUZ: F0 11110000000000000000{{$}}
L20_FUZ-NOT: F

RUN:%t-Labels20TestDF %t/IN20/FUZZxxxxxxxxxxxxxxxx | FileCheck %s --check-prefix=L20_FUZZ
L20_FUZZ: F0 11110000000000000000{{$}}
L20_FUZZ-NEXT: F1 00000000000000001000{{$}}
L20_FUZZ-NOT: F

RUN:%t-Labels20TestDF %t/IN20/FUZZxxxxxxxxxxxxMxxx | FileCheck %s --check-prefix=L20_FUZZM
L20_FUZZM: F0 11110000000000000000{{$}}
L20_FUZZM-NEXT: F1 00000000000000001000{{$}}
L20_FUZZM-NEXT: F2 00000000000000000001{{$}}
L20_FUZZM-NOT: F

# Test libFuzzer's built in DFT collection.
RUN: rm -rf %t-DFT
RUN: %t-ThreeFunctionsTest  -collect_data_flow=%t-ThreeFunctionsTestDF -data_flow_trace=%t-DFT %t/IN/FUZZMU
RUN: cat %t-DFT/* | sort | FileCheck %s --check-prefix=IN_FUZZMU

IN_FUZZMU: F0 111100
IN_FUZZMU: F1 000010
IN_FUZZMU: F2 000001

# Test that we can run collect_data_flow on the entire corpus dir
RUN: rm -rf %t/OUT
RUN: %t-ThreeFunctionsTest  -collect_data_flow=%t-ThreeFunctionsTestDF -data_flow_trace=%t/OUT %t/IN
RUN: %t-ThreeFunctionsTest -data_flow_trace=%t/OUT -runs=0 -focus_function=Func2 %t/IN 2>&1 | FileCheck %s --check-prefix=USE_DATA_FLOW_TRACE

USE_DATA_FLOW_TRACE: INFO: DataFlowTrace: reading from {{.*}}/OUT
USE_DATA_FLOW_TRACE: d28cb407e8e1a702c72d25473f0553d3ec172262 => |000001|
USE_DATA_FLOW_TRACE: INFO: DataFlowTrace: 6 trace files, 3 functions, 1 traces with focus function
USE_DATA_FLOW_TRACE: INFO: Focus function is set to 'Func2'
USE_DATA_FLOW_TRACE: INITED
USE_DATA_FLOW_TRACE: INFO: 2/6 inputs touch the focus function
USE_DATA_FLOW_TRACE: INFO: 1/2 inputs have the Data Flow Trace

# Test that we can run collect_data_flow on a long input (>2**16 bytes)
RUN: printf "%0.sA" {1..150001} > %t/IN/very_long_input
RUN: rm -rf %t/OUT
RUN: %t-ThreeFunctionsTest  -collect_data_flow=%t-ThreeFunctionsTestDF -data_flow_trace=%t/OUT %t/IN/very_long_input
RUN: rm %t/IN/very_long_input

# Test that it fails explicitly when an empty corpus is provided.
RUN: rm -rf %t/IN && mkdir %t/IN
RUN: not %t-ThreeFunctionsTest  -collect_data_flow=%t-ThreeFunctionsTestDF -data_flow_trace=%t/OUT %t/IN 2>&1 | FileCheck %s --check-prefix=EMPTY_CORPUS

EMPTY_CORPUS: ERROR: can't collect data flow without corpus provided