Skip to content

Commit

Permalink
[X86-64] Add support for unpcklpd/unpclps instructions
Browse files Browse the repository at this point in the history
[X86-64] Add support for pshufd
[Test] Test to verify added support
  • Loading branch information
martin-fink authored and bharadwajy committed Feb 28, 2022
1 parent 3c5ced5 commit ea33269
Show file tree
Hide file tree
Showing 4 changed files with 338 additions and 12 deletions.
12 changes: 6 additions & 6 deletions X86/X86AdditionalInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2052,8 +2052,8 @@ static constexpr const_addl_instr_info::value_type mapdata[] = {
{X86::PSADBWrr, {0, Unknown}},
{X86::PSHUFBrm, {0, Unknown}},
{X86::PSHUFBrr, {0, Unknown}},
{X86::PSHUFDmi, {0, Unknown}},
{X86::PSHUFDri, {0, Unknown}},
{X86::PSHUFDmi, {16, BINARY_OP_RM}},
{X86::PSHUFDri, {0, BINARY_OP_WITH_IMM}},
{X86::PSHUFHWmi, {0, Unknown}},
{X86::PSHUFHWri, {0, Unknown}},
{X86::PSHUFLWmi, {0, Unknown}},
Expand Down Expand Up @@ -2768,10 +2768,10 @@ static constexpr const_addl_instr_info::value_type mapdata[] = {
{X86::UNPCKHPDrr, {0, Unknown}},
{X86::UNPCKHPSrm, {0, Unknown}},
{X86::UNPCKHPSrr, {0, Unknown}},
{X86::UNPCKLPDrm, {0, Unknown}},
{X86::UNPCKLPDrr, {0, Unknown}},
{X86::UNPCKLPSrm, {0, Unknown}},
{X86::UNPCKLPSrr, {0, Unknown}},
{X86::UNPCKLPDrm, {16, BINARY_OP_RM}},
{X86::UNPCKLPDrr, {0, BINARY_OP_RR}},
{X86::UNPCKLPSrm, {16, BINARY_OP_RM}},
{X86::UNPCKLPSrr, {0, BINARY_OP_RR}},
{X86::VAARG_64, {0, Unknown}},
{X86::VADDPDYrm, {0, Unknown}},
{X86::VADDPDYrr, {0, Unknown}},
Expand Down
163 changes: 157 additions & 6 deletions X86/X86MachineInstructionRaiser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1726,6 +1726,55 @@ bool X86MachineInstructionRaiser::raiseBinaryOpRegToRegMachineInstr(
// Update the value of dstReg
raisedValues->setPhysRegSSAValue(dstReg, MBBNo, Result);
} break;
case X86::UNPCKLPDrr:
case X86::UNPCKLPSrr: {
Value *Src1Value = ExplicitSrcValues.at(0);
Value *Src2Value = ExplicitSrcValues.at(1);
// Verify the def operand is a register.
assert(MI.getOperand(DestOpIndex).isReg() &&
"Expecting destination of sse op instruction to be a register "
"operand");
assert((MCID.getNumDefs() == 1) &&
"Unexpected number of defines in sse op instruction");
assert((Src1Value != nullptr) && (Src2Value != nullptr) &&
"Unhandled situation: register is used before initialization in "
"sse op");
dstReg = MI.getOperand(DestOpIndex).getReg();

unsigned int segmentSize;
if (MI.getOpcode() == X86::UNPCKLPDrr) {
segmentSize = 64;
} else {
segmentSize = 32;
}

LLVMContext &Ctx(MF.getFunction().getContext());
FixedVectorType *VecTy = FixedVectorType::get(Type::getIntNTy(Ctx, segmentSize), 128 / segmentSize);

Src1Value = new BitCastInst(Src1Value, VecTy, "", RaisedBB);
Src2Value = new BitCastInst(Src2Value, VecTy, "", RaisedBB);

Value *Result = ConstantInt::get(VecTy, 0);
for (unsigned int i = 0; i < VecTy->getNumElements(); ++i) {
auto DstIndex = ConstantInt::get(VecTy->getElementType(), i);
auto SrcIndex = ConstantInt::get(VecTy->getElementType(), i >> 1);

Value *SrcVal;
if (i % 2 == 0) {
SrcVal = Src1Value;
} else {
SrcVal = Src2Value;
}

auto ExtractInst = ExtractElementInst::Create(SrcVal, SrcIndex, "", RaisedBB);
Result = InsertElementInst::Create(Result, ExtractInst, DstIndex, "", RaisedBB);
}

// Copy any necessary rodata related metadata
raisedValues->setInstMetadataRODataIndex(Src1Value, (Instruction *) Result);
// Update the value of dstReg
raisedValues->setPhysRegSSAValue(dstReg, MBBNo, Result);
} break;
default:
MI.dump();
assert(false && "Unhandled binary instruction");
Expand Down Expand Up @@ -2140,6 +2189,82 @@ bool X86MachineInstructionRaiser::raiseBinaryOpMemToRegInstr(

BinOpInst = (Instruction *)Result;
} break;
case X86::UNPCKLPDrm:
case X86::UNPCKLPSrm: {
assert(DestValue != nullptr && "Encountered instruction with undefined register");

unsigned int segmentSize;
if (MI.getOpcode() == X86::UNPCKLPDrm) {
segmentSize = 64;
} else {
segmentSize = 32;
}

LLVMContext &Ctx(MF.getFunction().getContext());
FixedVectorType *VecTy = FixedVectorType::get(Type::getIntNTy(Ctx, segmentSize), 128 / segmentSize);

Value *Src1Value = new BitCastInst(DestValue, VecTy, "", RaisedBB);
Value *Src2Value = new BitCastInst(LoadValue, VecTy, "", RaisedBB);

Value *Result = ConstantInt::get(VecTy, 0);
for (unsigned int i = 0; i < VecTy->getNumElements(); ++i) {
auto DstIndex = ConstantInt::get(VecTy->getElementType(), i);
auto SrcIndex = ConstantInt::get(VecTy->getElementType(), i >> 1);

Value *SrcVal;
if (i % 2 == 0) {
SrcVal = Src1Value;
} else {
SrcVal = Src2Value;
}

auto ExtractInst = ExtractElementInst::Create(SrcVal, SrcIndex, "", RaisedBB);
// don't insert last instruction as that will be done after the switch statement
if (i != VecTy->getNumElements() - 1) {
Result = InsertElementInst::Create(Result, ExtractInst, DstIndex, "", RaisedBB);
} else {
Result = InsertElementInst::Create(Result, ExtractInst, DstIndex);
}
}

BinOpInst = (Instruction *) Result;
} break;
case X86::PSHUFDmi: {
// Get index of memory reference in the instruction.
int MemoryRefOpIndex = getMemoryRefOpIndex(MI);
// The index of the memory reference operand should be 1
assert(MemoryRefOpIndex == 1 &&
"Unexpected memory reference operand index in imul instruction");
const MachineOperand &SecondSourceOp =
MI.getOperand(MemoryRefOpIndex + X86::AddrNumOperands);
// Second source should be an immediate.
assert(SecondSourceOp.isImm() &&
"Expect immediate operand in imul instruction");

LLVMContext &Ctx(MF.getFunction().getContext());
FixedVectorType *VecTy = FixedVectorType::get(Type::getInt32Ty(Ctx), 4);
Value *SrcOpValue =
getRaisedValues()->reinterpretSSERegValue(LoadValue, VecTy, RaisedBB);

uint8_t ImmValue = (uint8_t)SecondSourceOp.getImm();
Value *Result = ConstantInt::get(VecTy, 0);
for (unsigned i = 0; i < VecTy->getNumElements(); ++i) {
auto DstIndex = ConstantInt::get(Type::getInt32Ty(Ctx), i);
auto SrcIndex =
ConstantInt::get(Type::getInt32Ty(Ctx), (ImmValue >> (2 * i) & 0b11));

auto ExtractInst =
ExtractElementInst::Create(SrcOpValue, SrcIndex, "", RaisedBB);
if (i != VecTy->getNumElements() - 1) {
Result = InsertElementInst::Create(Result, ExtractInst, DstIndex, "",
RaisedBB);
} else {
Result = InsertElementInst::Create(Result, ExtractInst, DstIndex);
}
}

BinOpInst = dyn_cast<Instruction>(Result);
} break;
default:
assert(false && "Unhandled binary op mem to reg instruction ");
}
Expand Down Expand Up @@ -3915,21 +4040,19 @@ bool X86MachineInstructionRaiser::raiseBinaryOpImmToRegMachineInstr(
Value *SrcOp2Value = nullptr;
unsigned int DstPReg = X86::NoRegister;

// Ensure that the instruction defines EFLAGS as implicit define register.
assert(MIDesc.hasImplicitDefOfPhysReg(X86::EFLAGS) &&
"Expected implicit def operand EFLAGS not found");

// A vector holding source operand values.
SmallVector<Value *, 2> OpValues = {nullptr, nullptr};
unsigned NumImplicitDefs = MIDesc.getNumImplicitDefs();
assert(((NumImplicitDefs == 1) || (NumImplicitDefs == 2)) &&
assert((NumImplicitDefs == 0 || NumImplicitDefs == 1 ||
NumImplicitDefs == 2) &&
"Encountered instruction unexpected number of implicit defs");
// Index of the instruction operand being read.
unsigned CurExplicitOpIndex = 0;
// Keep a count of the number of instruction operands evaluated. A count of
// NumOperands need to be evaluated. The value is 1 because we have already
// checked that EFLAGS is an implicit def.
unsigned NumOperandsEval = 1;
unsigned NumOperandsEval =
MIDesc.hasImplicitDefOfPhysReg(X86::EFLAGS) ? 1 : 0;
// Find destination register of the instruction
// If the instruction has an explicit dest operand, get the DstPreg from
// dest operand.
Expand Down Expand Up @@ -4282,6 +4405,34 @@ bool X86MachineInstructionRaiser::raiseBinaryOpImmToRegMachineInstr(
AffectedEFlags.insert(EFLAGS::ZF);
AffectedEFlags.insert(EFLAGS::PF);
break;
case X86::PSHUFDri: {
ConstantInt *Imm = dyn_cast<ConstantInt>(SrcOp2Value);
assert(Imm && "Expected immediate for pshufd to be defined");

LLVMContext &Ctx(MF.getFunction().getContext());
FixedVectorType *VecTy = FixedVectorType::get(Type::getInt32Ty(Ctx), 4);
SrcOp1Value = getRaisedValues()->reinterpretSSERegValue(SrcOp1Value,
VecTy, RaisedBB);

uint8_t ImmValue = (uint8_t)Imm->getZExtValue();
Value *Result = ConstantInt::get(VecTy, 0);
for (unsigned i = 0; i < VecTy->getNumElements(); ++i) {
auto DstIndex = ConstantInt::get(Type::getInt32Ty(Ctx), i);
auto SrcIndex = ConstantInt::get(Type::getInt32Ty(Ctx),
(ImmValue >> (2 * i) & 0b11));

auto ExtractInst =
ExtractElementInst::Create(SrcOp1Value, SrcIndex, "", RaisedBB);
if (i != VecTy->getNumElements() - 1) {
Result = InsertElementInst::Create(Result, ExtractInst, DstIndex, "",
RaisedBB);
} else {
Result = InsertElementInst::Create(Result, ExtractInst, DstIndex);
}
}

BinOpInstr = dyn_cast<Instruction>(Result);
} break;
default:
LLVM_DEBUG(MI.dump());
assert(false && "Unhandled reg to imm binary operator instruction");
Expand Down
98 changes: 98 additions & 0 deletions test/asm_test/X86/raise-pshufd.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
// REQUIRES: x86_64-linux
// RUN: clang -O0 -o %t %s
// RUN: llvm-mctoll -d -I /usr/include/stdio.h %t
// RUN: clang -o %t-dis %t-dis.ll
// RUN: %t-dis 2>&1 | FileCheck %s
// CHECK: 0xaaaaaaaabbbbbbbbccccccccdddddddd
// CHECK-NEXT: 0xddddddddccccccccbbbbbbbbaaaaaaaa
// CHECK-NEXT: 0xaaaaaaaabbbbbbbbaaaaaaaabbbbbbbb
// CHECK-NEXT: 0xaaaaaaaabbbbbbbbccccccccdddddddd
// CHECK-NEXT: 0xddddddddccccccccbbbbbbbbaaaaaaaa
// CHECK-NEXT: 0xaaaaaaaabbbbbbbbaaaaaaaabbbbbbbb
// CHECK-EMPTY

.text
.intel_syntax noprefix
.file "raise-unpcklpd.s"

.globl main # -- Begin function main
.p2align 4, 0x90
.type main,@function
main: # @main
sub rsp, 16

movdqa xmm0, [.L.val]
movdqa xmm1, [.L.val.1]
pshufd xmm0, xmm1, 0xe4 # = 11 10 01 00
movdqu [rsp], xmm0
mov rsi, [rsp]
mov rdx, [rsp + 8]
movabs rdi, offset .L.str
mov al, 0
call printf

movdqa xmm0, [.L.val]
movdqa xmm1, [.L.val.1]
pshufd xmm0, xmm1, 0x1b # = 00 01 10 11
movdqu [rsp], xmm0
mov rsi, [rsp]
mov rdx, [rsp + 8]
movabs rdi, offset .L.str
mov al, 0
call printf

movdqa xmm0, [.L.val]
movdqa xmm1, [.L.val.1]
pshufd xmm0, xmm1, 0x44 # = 01 00 01 00
movdqu [rsp], xmm0
mov rsi, [rsp]
mov rdx, [rsp + 8]
movabs rdi, offset .L.str
mov al, 0
call printf

movdqa xmm0, [.L.val]
pshufd xmm0, [.L.val.1], 0xe4 # = 11 10 01 00
movdqu [rsp], xmm0
mov rsi, [rsp]
mov rdx, [rsp + 8]
movabs rdi, offset .L.str
mov al, 0
call printf

movdqa xmm0, [.L.val]
pshufd xmm0, [.L.val.1], 0x1b # = 00 01 10 11
movdqu [rsp], xmm0
mov rsi, [rsp]
mov rdx, [rsp + 8]
movabs rdi, offset .L.str
mov al, 0
call printf

movdqa xmm0, [.L.val]
pshufd xmm0, [.L.val.1], 0x44 # = 01 00 01 00
movdqu [rsp], xmm0
mov rsi, [rsp]
mov rdx, [rsp + 8]
movabs rdi, offset .L.str
mov al, 0
call printf

add rsp, 16
xor rax, rax
ret

.type .L.str,@object # @.str
.section .rodata.str1.1,"aMS",@progbits,1
.L.str:
.asciz "0x%016llx%016llx\n"
.size .L.str, 6

.section .rodata.cst16,"aM",@progbits,16
.align 16
.L.val:
.quad 0xffffffffffffffff
.quad 0xffffffffffffffff
.L.val.1:
.quad 0xaaaaaaaabbbbbbbb
.quad 0xccccccccdddddddd
Loading

0 comments on commit ea33269

Please sign in to comment.