llvm-project/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h

102 lines
3.8 KiB
C
Raw Normal View History

//===-- PPCHazardRecognizers.h - PowerPC Hazard Recognizers -----*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines hazard recognizers for scheduling on PowerPC processors.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_POWERPC_PPCHAZARDRECOGNIZERS_H
#define LLVM_LIB_TARGET_POWERPC_PPCHAZARDRECOGNIZERS_H
#include "PPCInstrInfo.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
namespace llvm {
2010-12-24 12:28:06 +08:00
Improve instruction scheduling for the PPC POWER7 Aside from a few minor latency corrections, the major change here is a new hazard recognizer which focuses on better dispatch-group formation on the POWER7. As with the PPC970's hazard recognizer, the most important thing it does is avoid load-after-store hazards within the same dispatch group. It uses the POWER7's special dispatch-group-terminating nop instruction (instead of inserting multiple regular nop instructions). This new hazard recognizer makes use of the scheduling dependency graph itself, built using AA information, to robustly detect the possibility of load-after-store hazards. significant test-suite performance changes (the error bars are 99.5% confidence intervals based on 5 test-suite runs both with and without the change -- speedups are negative): speedups: MultiSource/Benchmarks/FreeBench/pcompress2/pcompress2 -0.55171% +/- 0.333168% MultiSource/Benchmarks/TSVC/CrossingThresholds-dbl/CrossingThresholds-dbl -17.5576% +/- 14.598% MultiSource/Benchmarks/TSVC/Reductions-dbl/Reductions-dbl -29.5708% +/- 7.09058% MultiSource/Benchmarks/TSVC/Reductions-flt/Reductions-flt -34.9471% +/- 11.4391% SingleSource/Benchmarks/BenchmarkGame/puzzle -25.1347% +/- 11.0104% SingleSource/Benchmarks/Misc/flops-8 -17.7297% +/- 9.79061% SingleSource/Benchmarks/Shootout-C++/ary3 -35.5018% +/- 23.9458% SingleSource/Regression/C/uint64_to_float -56.3165% +/- 25.4234% SingleSource/UnitTests/Vectorizer/gcc-loops -18.5309% +/- 6.8496% regressions: MultiSource/Benchmarks/ASCI_Purple/SMG2000/smg2000 18.351% +/- 12.156% SingleSource/Benchmarks/Shootout-C++/methcall 27.3086% +/- 14.4733% llvm-svn: 197099
2013-12-12 08:19:11 +08:00
/// PPCDispatchGroupSBHazardRecognizer - This class implements a scoreboard-based
/// hazard recognizer for PPC ooo processors with dispatch-group hazards.
class PPCDispatchGroupSBHazardRecognizer : public ScoreboardHazardRecognizer {
const ScheduleDAG *DAG;
SmallVector<SUnit *, 7> CurGroup;
unsigned CurSlots, CurBranches;
bool isLoadAfterStore(SUnit *SU);
bool isBCTRAfterSet(SUnit *SU);
bool mustComeFirst(const MCInstrDesc *MCID, unsigned &NSlots);
public:
PPCDispatchGroupSBHazardRecognizer(const InstrItineraryData *ItinData,
const ScheduleDAG *DAG_) :
ScoreboardHazardRecognizer(ItinData, DAG_), DAG(DAG_),
CurSlots(0), CurBranches(0) {}
HazardType getHazardType(SUnit *SU, int Stalls) override;
bool ShouldPreferAnother(SUnit* SU) override;
unsigned PreEmitNoops(SUnit *SU) override;
void EmitInstruction(SUnit *SU) override;
void AdvanceCycle() override;
void RecedeCycle() override;
void Reset() override;
void EmitNoop() override;
Improve instruction scheduling for the PPC POWER7 Aside from a few minor latency corrections, the major change here is a new hazard recognizer which focuses on better dispatch-group formation on the POWER7. As with the PPC970's hazard recognizer, the most important thing it does is avoid load-after-store hazards within the same dispatch group. It uses the POWER7's special dispatch-group-terminating nop instruction (instead of inserting multiple regular nop instructions). This new hazard recognizer makes use of the scheduling dependency graph itself, built using AA information, to robustly detect the possibility of load-after-store hazards. significant test-suite performance changes (the error bars are 99.5% confidence intervals based on 5 test-suite runs both with and without the change -- speedups are negative): speedups: MultiSource/Benchmarks/FreeBench/pcompress2/pcompress2 -0.55171% +/- 0.333168% MultiSource/Benchmarks/TSVC/CrossingThresholds-dbl/CrossingThresholds-dbl -17.5576% +/- 14.598% MultiSource/Benchmarks/TSVC/Reductions-dbl/Reductions-dbl -29.5708% +/- 7.09058% MultiSource/Benchmarks/TSVC/Reductions-flt/Reductions-flt -34.9471% +/- 11.4391% SingleSource/Benchmarks/BenchmarkGame/puzzle -25.1347% +/- 11.0104% SingleSource/Benchmarks/Misc/flops-8 -17.7297% +/- 9.79061% SingleSource/Benchmarks/Shootout-C++/ary3 -35.5018% +/- 23.9458% SingleSource/Regression/C/uint64_to_float -56.3165% +/- 25.4234% SingleSource/UnitTests/Vectorizer/gcc-loops -18.5309% +/- 6.8496% regressions: MultiSource/Benchmarks/ASCI_Purple/SMG2000/smg2000 18.351% +/- 12.156% SingleSource/Benchmarks/Shootout-C++/methcall 27.3086% +/- 14.4733% llvm-svn: 197099
2013-12-12 08:19:11 +08:00
};
/// PPCHazardRecognizer970 - This class defines a finite state automata that
/// models the dispatch logic on the PowerPC 970 (aka G5) processor. This
/// promotes good dispatch group formation and implements noop insertion to
/// avoid structural hazards that cause significant performance penalties (e.g.
/// setting the CTR register then branching through it within a dispatch group),
/// or storing then loading from the same address within a dispatch group.
class PPCHazardRecognizer970 : public ScheduleHazardRecognizer {
const ScheduleDAG &DAG;
2010-12-24 12:28:06 +08:00
unsigned NumIssued; // Number of insts issued, including advanced cycles.
2010-12-24 12:28:06 +08:00
// Various things that can cause a structural hazard.
2010-12-24 12:28:06 +08:00
// HasCTRSet - If the CTR register is set in this group, disallow BCTRL.
bool HasCTRSet;
2010-12-24 12:28:06 +08:00
// StoredPtr - Keep track of the address of any store. If we see a load from
// the same address (or one that aliases it), disallow the store. We can have
// up to four stores in one dispatch group, hence we track up to 4.
//
// This is null if we haven't seen a store yet. We keep track of both
// operands of the store here, since we support [r+r] and [r+i] addressing.
const Value *StoreValue[4];
int64_t StoreOffset[4];
uint64_t StoreSize[4];
unsigned NumStores;
2010-12-24 12:28:06 +08:00
public:
PPCHazardRecognizer970(const ScheduleDAG &DAG);
HazardType getHazardType(SUnit *SU, int Stalls) override;
void EmitInstruction(SUnit *SU) override;
void AdvanceCycle() override;
void Reset() override;
2010-12-24 12:28:06 +08:00
private:
/// EndDispatchGroup - Called when we are finishing a new dispatch group.
///
void EndDispatchGroup();
2010-12-24 12:28:06 +08:00
/// GetInstrType - Classify the specified powerpc opcode according to its
/// pipeline.
PPCII::PPC970_Unit GetInstrType(unsigned Opcode,
bool &isFirst, bool &isSingle,bool &isCracked,
bool &isLoad, bool &isStore);
2010-12-24 12:28:06 +08:00
bool isLoadOfStoredAddress(uint64_t LoadSize, int64_t LoadOffset,
const Value *LoadValue) const;
};
} // end namespace llvm
#endif