Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 22 additions & 3 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20590,7 +20590,10 @@ bool GenTree::isEmbeddedMaskingCompatible() const
// Return Value:
// true if the node lowering instruction has a EVEX embedded masking support
//
bool GenTree::isEmbeddedMaskingCompatible(Compiler* comp, unsigned tgtMaskSize, CorInfoType& tgtSimdBaseJitType) const
bool GenTree::isEmbeddedMaskingCompatible(Compiler* comp,
unsigned tgtMaskSize,
CorInfoType& tgtSimdBaseJitType,
size_t* broadcastOpIndex /* = nullptr */) const
{
if (!isEmbeddedMaskingCompatible())
{
Expand Down Expand Up @@ -20660,9 +20663,17 @@ bool GenTree::isEmbeddedMaskingCompatible(Compiler* comp, unsigned tgtMaskSize,

if (!comp->codeGen->IsEmbeddedBroadcastEnabled(ins, node->Op(2)))
{
// We cannot change the base type if we've already contained a broadcast
// If we haven't contained a broadcast, we can change the base type freely
supportsMaskBaseSize2Or4 = true;
}
else if (maskBaseSize == 4)
{
assert(broadcastOpIndex != nullptr);

// If the contained broadcast is 4 bytes, we can change it to 8 bytes
supportsMaskBaseSize2Or4 = true;
*broadcastOpIndex = 2;
}
break;
}

Expand All @@ -20674,8 +20685,16 @@ bool GenTree::isEmbeddedMaskingCompatible(Compiler* comp, unsigned tgtMaskSize,

if (!comp->codeGen->IsEmbeddedBroadcastEnabled(ins, node->Op(3)))
{
// We cannot change the base type if we've already contained a broadcast
// If we haven't contained a broadcast, we can change the base type freely
supportsMaskBaseSize2Or4 = true;
}
else if (maskBaseSize == 4)
{
assert(broadcastOpIndex != nullptr);

// If the contained broadcast is 4 bytes, we can change it to 8 bytes
supportsMaskBaseSize2Or4 = true;
*broadcastOpIndex = 3;
}
break;
}
Expand Down
5 changes: 4 additions & 1 deletion src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -1521,7 +1521,10 @@ struct GenTree
#if defined(TARGET_XARCH)
bool isEvexCompatibleHWIntrinsic(Compiler* comp) const;
bool isEmbeddedBroadcastCompatibleHWIntrinsic(Compiler* comp) const;
bool isEmbeddedMaskingCompatible(Compiler* comp, unsigned tgtMaskSize, CorInfoType& tgtSimdBaseJitType) const;
bool isEmbeddedMaskingCompatible(Compiler* comp,
unsigned tgtMaskSize,
CorInfoType& tgtSimdBaseJitType,
size_t* broadcastOpIndex = nullptr) const;
#endif // TARGET_XARCH
bool isEmbeddedMaskingCompatible() const;
#else
Expand Down
14 changes: 7 additions & 7 deletions src/coreclr/jit/instrsxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -477,13 +477,13 @@ INST3(aeskeygenassist, "vaeskeygenassist", IUM_WR, BAD_CODE, BAD_CODE,
INST3(pclmulqdq, "vpclmulqdq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x44), 7C, 1C, INS_TT_FULL_MEM, KMask_Base1 | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_FLAGS_HasPseudoName) // Perform a carry-less multiplication of two quadwords

// Instructions for SHA
INST3(sha1msg1, "sha1msg1", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xC9), ILLEGAL, ILLEGAL, INS_TT_FULL, REX_WIG) // Perform an Intermediate Calculation for the Next Four SHA1 Message Dwords
INST3(sha1msg2, "sha1msg2", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xCA), ILLEGAL, ILLEGAL, INS_TT_FULL, REX_WIG) // Perform a Final Calculation for the Next Four SHA1 Message Dwords
INST3(sha1nexte, "sha1nexte", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xC8), ILLEGAL, ILLEGAL, INS_TT_FULL, REX_WIG) // Calculate SHA1 State Variable E After Four Rounds
INST3(sha1rnds4, "sha1rnds4", IUM_RW, BAD_CODE, BAD_CODE, SSE3A(0xCC), ILLEGAL, ILLEGAL, INS_TT_FULL, REX_WIG) // Perform Four Rounds of SHA1 Operation
INST3(sha256msg1, "sha256msg1", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xCC), ILLEGAL, ILLEGAL, INS_TT_FULL, REX_WIG) // Perform an Intermediate Calculation for the Next Four SHA256 Message Dwords
INST3(sha256msg2, "sha256msg2", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xCD), ILLEGAL, ILLEGAL, INS_TT_FULL, REX_WIG) // Perform a Final Calculation for the Next Four SHA256 Message Dwords
INST3(sha256rnds2, "sha256rnds2", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xCB), ILLEGAL, ILLEGAL, INS_TT_FULL, REX_WIG) // Perform Two Rounds of SHA256 Operation
INST3(sha1msg1, "sha1msg1", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xC9), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_WIG) // Perform an Intermediate Calculation for the Next Four SHA1 Message Dwords
INST3(sha1msg2, "sha1msg2", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xCA), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_WIG) // Perform a Final Calculation for the Next Four SHA1 Message Dwords
INST3(sha1nexte, "sha1nexte", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xC8), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_WIG) // Calculate SHA1 State Variable E After Four Rounds
INST3(sha1rnds4, "sha1rnds4", IUM_RW, BAD_CODE, BAD_CODE, SSE3A(0xCC), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_WIG) // Perform Four Rounds of SHA1 Operation
INST3(sha256msg1, "sha256msg1", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xCC), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_WIG) // Perform an Intermediate Calculation for the Next Four SHA256 Message Dwords
INST3(sha256msg2, "sha256msg2", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xCD), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_WIG) // Perform a Final Calculation for the Next Four SHA256 Message Dwords
INST3(sha256rnds2, "sha256rnds2", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xCB), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_WIG) // Perform Two Rounds of SHA256 Operation

// Instructions for GFNI
INST3(gf2p8affineinvqb, "vgf2p8affineinvqb",IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xCF), 5C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Galois Field Affine Transformation Inverse
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/lower.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ class Lowering final : public Phase
void ContainCheckHWIntrinsicAddr(GenTreeHWIntrinsic* node, GenTree* addr, unsigned size);
void ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node);
#ifdef TARGET_XARCH
void TryFoldCnsVecForEmbeddedBroadcast(GenTreeHWIntrinsic* parentNode, GenTreeVecCon* childNode);
void TryFoldCnsVecForEmbeddedBroadcast(GenTreeHWIntrinsic* parentNode, GenTreeVecCon* cnsVec);
#endif // TARGET_XARCH
#endif // FEATURE_HW_INTRINSICS

Expand Down
Loading
Loading