29#define DEBUG_TYPE "hexagontti"
36 cl::desc(
"Allow auto-generation of HVX scatter-gather"));
40 cl::desc(
"Enable auto-vectorization of floatint point types on v68."));
44 cl::desc(
"Control lookup table emission on Hexagon target"));
54bool HexagonTTIImpl::useHVX()
const {
58bool HexagonTTIImpl::isHVXVectorType(
Type *Ty)
const {
62 if (!ST.isTypeForHVX(VecTy))
64 if (ST.useHVXV69Ops() || !VecTy->getElementType()->isFloatingPointTy())
69unsigned HexagonTTIImpl::getTypeNumElements(
Type *Ty)
const {
71 return VTy->getNumElements();
73 "Expecting scalar type");
95 if (L && L->isInnermost() &&
canPeel(L) &&
112 bool Vector = ClassID == 1;
114 return useHVX() ? 32 : 0;
119 return useHVX() ? 2 : 1;
137 return useHVX() ? ST.getVectorLength()*8 : 32;
141 bool IsScalable)
const {
142 assert(!IsScalable &&
"Scalable VFs are not supported for Hexagon");
155 if (ICA.
getID() == Intrinsic::bswap) {
156 std::pair<InstructionCost, MVT> LT =
176 assert(Opcode == Instruction::Load || Opcode == Instruction::Store);
181 if (Opcode == Instruction::Store)
185 if (Src->isVectorTy()) {
188 if (isHVXVectorType(VecTy)) {
192 assert(RegWidth &&
"Non-zero vector register width expected");
194 if (VecWidth % RegWidth == 0)
195 return VecWidth / RegWidth;
197 const Align RegAlign(RegWidth / 8);
198 if (Alignment > RegAlign)
199 Alignment = RegAlign;
200 unsigned AlignWidth = 8 * Alignment.
value();
201 unsigned NumLoads =
alignTo(VecWidth, AlignWidth) / AlignWidth;
211 const Align BoundAlignment = std::min(Alignment,
Align(8));
212 unsigned AlignWidth = 8 * BoundAlignment.
value();
213 unsigned NumLoads =
alignTo(VecWidth, AlignWidth) / AlignWidth;
214 if (Alignment ==
Align(4) || Alignment ==
Align(8))
215 return Cost * NumLoads;
218 unsigned LogA =
Log2(BoundAlignment);
219 return (3 - LogA) *
Cost * NumLoads;
244 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
253 bool UseMaskForCond,
bool UseMaskForGaps)
const {
254 if (Indices.
size() != Factor || UseMaskForCond || UseMaskForGaps)
258 UseMaskForCond, UseMaskForGaps);
267 if (!isHVXVectorType(ValTy) && ValTy->isFPOrFPVectorTy())
270 if (Opcode == Instruction::FCmp)
271 return LT.first +
FloatFactor * getTypeNumElements(ValTy);
274 Op1Info, Op2Info,
I);
284 Op2Info, Args, CxtI);
286 if (Ty->isVectorTy()) {
287 if (!isHVXVectorType(Ty) && Ty->isFPOrFPVectorTy())
290 if (LT.second.isFloatingPoint())
291 return LT.first +
FloatFactor * getTypeNumElements(Ty);
302 auto isNonHVXFP = [
this] (
Type *Ty) {
303 return Ty->isVectorTy() && !isHVXVectorType(Ty) && Ty->isFPOrFPVectorTy();
305 if (isNonHVXFP(SrcTy) || isNonHVXFP(DstTy))
309 unsigned SrcN = SrcTy->isFPOrFPVectorTy() ? getTypeNumElements(SrcTy) : 0;
315 std::max(SrcLT.first, DstLT.first) +
FloatFactor * (SrcN + DstN);
318 return Cost == 0 ? 0 : 1;
328 const Value *Op1)
const {
331 if (Opcode == Instruction::InsertElement) {
333 unsigned Cost = (Index != 0) ? 2 : 0;
334 if (ElemTy->isIntegerTy(32))
341 if (Opcode == Instruction::ExtractElement)
363 if (!Ty->isVectorTy() || !ST.isTypeForHVX(Ty) ||
367 switch (Ty->getScalarSizeInBits()) {
369 return (getTypeNumElements(Ty) == 128);
371 if (getTypeNumElements(Ty) == 64 || getTypeNumElements(Ty) == 32)
372 return (Alignment >= 2);
375 if (getTypeNumElements(Ty) == 32)
376 return (Alignment >= 4);
385 if (!Ty->isVectorTy() || !ST.isTypeForHVX(Ty) ||
389 switch (Ty->getScalarSizeInBits()) {
391 return (getTypeNumElements(Ty) == 128);
393 if (getTypeNumElements(Ty) == 64)
394 return (Alignment >= 2);
397 if (getTypeNumElements(Ty) == 32)
398 return (Alignment >= 4);
407 Align Alignment)
const {
412 Align Alignment)
const {
419 return ST.getL1PrefetchDistance();
423 return ST.getL1CacheLineSize();
430 auto isCastFoldedIntoLoad = [
this](
const CastInst *CI) ->
bool {
431 if (!CI->isIntegerCast())
436 unsigned SBW =
DL.getTypeSizeInBits(CI->getSrcTy());
437 unsigned DBW =
DL.getTypeSizeInBits(CI->getDestTy());
438 if (DBW != 32 || SBW >= DBW)
449 if (isCastFoldedIntoLoad(CI))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind) const override
This is the base class for all instructions that perform data casts.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
A parsed version of the target data layout string in and methods for querying it.
static constexpr ElementCount getFixed(ScalarTy MinVal)
bool isLegalMaskedStore(Type *DataType, Align Alignment, unsigned AddressSpace) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment) const override
ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const override
bool isLegalMaskedLoad(Type *DataType, Align Alignment, unsigned AddressSpace) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
unsigned getNumberOfRegisters(unsigned ClassID) const override
— Vector TTI begin —
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *S, TTI::TargetCostKind CostKind) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const override
unsigned getMinVectorRegisterBitWidth() const override
bool isLegalMaskedGather(Type *Ty, Align Alignment) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const override
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const override
Compute a cost of the given call instruction.
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
Get intrinsic cost based on arguments.
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const override
Bias LSR towards creating post-increment opportunities.
bool shouldBuildLookupTables() const override
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind) const override
unsigned getMaxInterleaveFactor(ElementCount VF) const override
bool isLegalMaskedScatter(Type *Ty, Align Alignment) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
unsigned getCacheLineSize() const override
unsigned getPrefetchDistance() const override
— Vector TTI end —
static InstructionCost getMax()
Type * getReturnType() const
Intrinsic::ID getID() const
An instruction for reading from memory.
Represents a single loop in the control flow graph.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
LLVM_ABI unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns the upper bound of the loop trip count as a normal unsigned value.
LLVM_ABI unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
LLVM Value Representation.
bool hasOneUse() const
Return true if there is exactly one use of this value.
Base class of all SIMD vector types.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
bool canPeel(const Loop *L)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
unsigned Log2(Align A)
Returns the log2 of the alignment.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.