@@ -8750,8 +8750,13 @@ static SDValue tryMatchAcrossLaneShuffleForReduction(SDNode *N, SDValue OpV,
8750
8750
return SDValue();
8751
8751
8752
8752
int NumVecElts = VTy.getVectorNumElements();
8753
- if (NumVecElts != 4 && NumVecElts != 8 && NumVecElts != 16)
8754
- return SDValue();
8753
+ if (Op == ISD::FMAXNUM || Op == ISD::FMINNUM) {
8754
+ if (NumVecElts != 4)
8755
+ return SDValue();
8756
+ } else {
8757
+ if (NumVecElts != 4 && NumVecElts != 8 && NumVecElts != 16)
8758
+ return SDValue();
8759
+ }
8755
8760
8756
8761
int NumExpectedSteps = APInt(8, NumVecElts).logBase2();
8757
8762
SDValue PreOp = OpV;
@@ -8802,6 +8807,8 @@ static SDValue tryMatchAcrossLaneShuffleForReduction(SDNode *N, SDValue OpV,
8802
8807
PreOp = CurOp;
8803
8808
}
8804
8809
unsigned Opcode;
8810
+ bool IsIntrinsic = false;
8811
+
8805
8812
switch (Op) {
8806
8813
default:
8807
8814
llvm_unreachable("Unexpected operator for across vector reduction");
@@ -8820,11 +8827,24 @@ static SDValue tryMatchAcrossLaneShuffleForReduction(SDNode *N, SDValue OpV,
8820
8827
case ISD::UMIN:
8821
8828
Opcode = AArch64ISD::UMINV;
8822
8829
break;
8830
+ case ISD::FMAXNUM:
8831
+ Opcode = Intrinsic::aarch64_neon_fmaxnmv;
8832
+ IsIntrinsic = true;
8833
+ break;
8834
+ case ISD::FMINNUM:
8835
+ Opcode = Intrinsic::aarch64_neon_fminnmv;
8836
+ IsIntrinsic = true;
8837
+ break;
8823
8838
}
8824
8839
SDLoc DL(N);
8825
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0),
8826
- DAG.getNode(Opcode, DL, PreOp.getSimpleValueType(), PreOp),
8827
- DAG.getConstant(0, DL, MVT::i64));
8840
+
8841
+ return IsIntrinsic
8842
+ ? DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, N->getValueType(0),
8843
+ DAG.getConstant(Opcode, DL, MVT::i32), PreOp)
8844
+ : DAG.getNode(
8845
+ ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0),
8846
+ DAG.getNode(Opcode, DL, PreOp.getSimpleValueType(), PreOp),
8847
+ DAG.getConstant(0, DL, MVT::i64));
8828
8848
}
8829
8849
8830
8850
/// Target-specific DAG combine for the across vector min/max reductions.
@@ -8848,9 +8868,6 @@ static SDValue tryMatchAcrossLaneShuffleForReduction(SDNode *N, SDValue OpV,
8848
8868
/// becomes :
8849
8869
/// %1 = smaxv %0
8850
8870
/// %result = extract_vector_elt %1, 0
8851
- /// FIXME: Currently this function matches only SMAXV, UMAXV, SMINV, and UMINV.
8852
- /// We could also support other types of across lane reduction available
8853
- /// in AArch64, including FMAXNMV, FMAXV, FMINNMV, and FMINV.
8854
8871
static SDValue
8855
8872
performAcrossLaneMinMaxReductionCombine(SDNode *N, SelectionDAG &DAG,
8856
8873
const AArch64Subtarget *Subtarget) {
@@ -8878,17 +8895,26 @@ performAcrossLaneMinMaxReductionCombine(SDNode *N, SelectionDAG &DAG,
8878
8895
SDValue VectorOp = SetCC.getOperand(0);
8879
8896
unsigned Op = VectorOp->getOpcode();
8880
8897
// Check if the input vector is fed by the operator we want to handle.
8881
- if (Op != ISD::SMAX && Op != ISD::UMAX && Op != ISD::SMIN && Op != ISD::UMIN)
8898
+ if (Op != ISD::SMAX && Op != ISD::UMAX && Op != ISD::SMIN &&
8899
+ Op != ISD::UMIN && Op != ISD::FMAXNUM && Op != ISD::FMINNUM)
8882
8900
return SDValue();
8883
8901
8884
8902
EVT VTy = VectorOp.getValueType();
8885
8903
if (!VTy.isVector())
8886
8904
return SDValue();
8887
8905
8888
- EVT EltTy = VTy.getVectorElementType();
8889
- if (EltTy != MVT::i32 && EltTy != MVT::i16 && EltTy != MVT::i8)
8906
+ if (VTy.getSizeInBits() < 64)
8890
8907
return SDValue();
8891
8908
8909
+ EVT EltTy = VTy.getVectorElementType();
8910
+ if (Op == ISD::FMAXNUM || Op == ISD::FMINNUM) {
8911
+ if (EltTy != MVT::f32)
8912
+ return SDValue();
8913
+ } else {
8914
+ if (EltTy != MVT::i32 && EltTy != MVT::i16 && EltTy != MVT::i8)
8915
+ return SDValue();
8916
+ }
8917
+
8892
8918
// Check if extracting from the same vector.
8893
8919
// For example,
8894
8920
// %sc = setcc %vector, %svn1, gt
@@ -8904,7 +8930,13 @@ performAcrossLaneMinMaxReductionCombine(SDNode *N, SelectionDAG &DAG,
8904
8930
if ((Op == ISD::SMAX && CC != ISD::SETGT && CC != ISD::SETGE) ||
8905
8931
(Op == ISD::UMAX && CC != ISD::SETUGT && CC != ISD::SETUGE) ||
8906
8932
(Op == ISD::SMIN && CC != ISD::SETLT && CC != ISD::SETLE) ||
8907
- (Op == ISD::UMIN && CC != ISD::SETULT && CC != ISD::SETULE))
8933
+ (Op == ISD::UMIN && CC != ISD::SETULT && CC != ISD::SETULE) ||
8934
+ (Op == ISD::FMAXNUM && CC != ISD::SETOGT && CC != ISD::SETOGE &&
8935
+ CC != ISD::SETUGT && CC != ISD::SETUGE && CC != ISD::SETGT &&
8936
+ CC != ISD::SETGE) ||
8937
+ (Op == ISD::FMINNUM && CC != ISD::SETOLT && CC != ISD::SETOLE &&
8938
+ CC != ISD::SETULT && CC != ISD::SETULE && CC != ISD::SETLT &&
8939
+ CC != ISD::SETLE))
8908
8940
return SDValue();
8909
8941
8910
8942
// Expect to check only lane 0 from the vector SETCC.
@@ -8963,6 +8995,9 @@ performAcrossLaneAddReductionCombine(SDNode *N, SelectionDAG &DAG,
8963
8995
if (EltTy != MVT::i32 && EltTy != MVT::i16 && EltTy != MVT::i8)
8964
8996
return SDValue();
8965
8997
8998
+ if (VTy.getSizeInBits() < 64)
8999
+ return SDValue();
9000
+
8966
9001
return tryMatchAcrossLaneShuffleForReduction(N, N0, ISD::ADD, DAG);
8967
9002
}
8968
9003
0 commit comments