Skip to content

Instantly share code, notes, and snippets.

@FioraAeterna
Created December 15, 2014 03:52
Show Gist options
  • Select an option

  • Save FioraAeterna/2caa0fecbd29b2137c23 to your computer and use it in GitHub Desktop.

Select an option

Save FioraAeterna/2caa0fecbd29b2137c23 to your computer and use it in GitHub Desktop.
Dolphin: use memory operands wherever possibl
From 7df6919fc55d0f54ef9e8b87540a5b41718e7dc7 Mon Sep 17 00:00:00 2001
From: Fiora <[email protected]>
Date: Mon, 27 Oct 2014 04:04:00 -0700
Subject: [PATCH] JIT: use memory destination operands wherever possible
Avoids lots of redundant instructions in the case of destination
operands that will never be used again in the block.
---
Source/Core/Core/PowerPC/Jit64/Jit.h | 1 +
Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 100 +++++++++++++++----------
2 files changed, 60 insertions(+), 41 deletions(-)
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h
index c583d83..d8b3ff8 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit.h
+++ b/Source/Core/Core/PowerPC/Jit64/Jit.h
@@ -123,6 +123,7 @@ public:
// Use to extract bytes from a register using the regcache. offset is in bytes.
Gen::OpArg ExtractFromReg(int reg, int offset);
void AndWithMask(Gen::X64Reg reg, u32 mask);
+ void BindRMOutput(int output, int input, bool force_bind = false, bool do_load = true);
bool CheckMergedBranch(int crf);
void DoMergedBranch();
void DoMergedBranchCondition();
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp
index 3574fa4..507b982 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp
@@ -195,6 +195,24 @@ void Jit64::AndWithMask(X64Reg reg, u32 mask)
AND(32, R(reg), Imm32(mask));
}
+// For operations where memory destination operands are supported, avoid binding the
+// destination register if we don't need to, and bind the input instead.
+void Jit64::BindRMOutput(int output, int input, bool force_bind, bool do_load)
+{
+ if ((js.op->gprInUse & (1 << output)) || force_bind)
+ {
+ gpr.BindToRegister(output, do_load);
+ }
+ else
+ {
+ if (output != input)
+ gpr.BindToRegister(input, true, false);
+ if (!do_load)
+ gpr.DiscardRegContentsIfCached(output);
+ gpr.KillImmediate(output, do_load, true);
+ }
+}
+
// Following static functions are used in conjunction with regimmop
static u32 Add(u32 a, u32 b)
{
@@ -230,7 +248,7 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void
}
else if (a == d)
{
- gpr.BindToRegister(d, true);
+ BindRMOutput(d, d, Rc);
(this->*op)(32, gpr.R(d), Imm32(value)); //m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16;
}
else
@@ -280,7 +298,7 @@ void Jit64::reg_imm(UGeckoInstruction inst)
else if (inst.SIMM_16 == 0 && d != a && a != 0)
{
gpr.Lock(a, d);
- gpr.BindToRegister(d, false, true);
+ BindRMOutput(d, a, false, false);
MOV(32, gpr.R(d), gpr.R(a));
gpr.UnlockAll();
}
@@ -615,34 +633,18 @@ void Jit64::boolX(UGeckoInstruction inst)
{
if ((inst.SUBOP10 == 28 /* andx */) || (inst.SUBOP10 == 444 /* orx */))
{
+ gpr.Lock(a, s);
+ BindRMOutput(a, s, inst.Rc, a == s);
if (a != s)
- {
- gpr.Lock(a,s);
- gpr.BindToRegister(a, false, true);
MOV(32, gpr.R(a), gpr.R(s));
- }
- else if (inst.Rc)
- {
- gpr.BindToRegister(a, true, false);
- }
needs_test = true;
}
else if ((inst.SUBOP10 == 476 /* nandx */) || (inst.SUBOP10 == 124 /* norx */))
{
+ gpr.Lock(a, s);
+ BindRMOutput(a, s, inst.Rc || a != s, a == s);
if (a != s)
- {
- gpr.Lock(a,s);
- gpr.BindToRegister(a, false, true);
MOV(32, gpr.R(a), gpr.R(s));
- }
- else if (inst.Rc)
- {
- gpr.BindToRegister(a, true, true);
- }
- else
- {
- gpr.KillImmediate(a, true, true);
- }
NOT(32, gpr.R(a));
needs_test = true;
}
@@ -661,66 +663,73 @@ void Jit64::boolX(UGeckoInstruction inst)
}
else if ((a == s) || (a == b))
{
- gpr.Lock(a,((a == s) ? b : s));
- OpArg operand = ((a == s) ? gpr.R(b) : gpr.R(s));
- gpr.BindToRegister(a, true, true);
+ int operand = a == s ? b : s;
+ gpr.Lock(a, operand);
if (inst.SUBOP10 == 28) // andx
{
- AND(32, gpr.R(a), operand);
+ BindRMOutput(a, operand, inst.Rc);
+ AND(32, gpr.R(a), gpr.R(operand));
}
else if (inst.SUBOP10 == 476) // nandx
{
- AND(32, gpr.R(a), operand);
+ gpr.BindToRegister(a, true, true);
+ AND(32, gpr.R(a), gpr.R(operand));
NOT(32, gpr.R(a));
needs_test = true;
}
else if (inst.SUBOP10 == 60) // andcx
{
+ gpr.BindToRegister(a, true, true);
if (a == b)
{
NOT(32, gpr.R(a));
- AND(32, gpr.R(a), operand);
+ AND(32, gpr.R(a), gpr.R(operand));
}
else
{
- MOV(32, R(RSCRATCH), operand);
+ MOV(32, R(RSCRATCH), gpr.R(operand));
NOT(32, R(RSCRATCH));
AND(32, gpr.R(a), R(RSCRATCH));
}
}
else if (inst.SUBOP10 == 444) // orx
{
- OR(32, gpr.R(a), operand);
+ BindRMOutput(a, operand, inst.Rc);
+ OR(32, gpr.R(a), gpr.R(operand));
}
else if (inst.SUBOP10 == 124) // norx
{
- OR(32, gpr.R(a), operand);
+ gpr.BindToRegister(a, true, true);
+ OR(32, gpr.R(a), gpr.R(operand));
NOT(32, gpr.R(a));
needs_test = true;
}
else if (inst.SUBOP10 == 412) // orcx
{
+ gpr.BindToRegister(a, true, true);
if (a == b)
{
NOT(32, gpr.R(a));
- OR(32, gpr.R(a), operand);
+ OR(32, gpr.R(a), gpr.R(operand));
}
else
{
- MOV(32, R(RSCRATCH), operand);
+ MOV(32, R(RSCRATCH), gpr.R(operand));
NOT(32, R(RSCRATCH));
OR(32, gpr.R(a), R(RSCRATCH));
}
}
else if (inst.SUBOP10 == 316) // xorx
{
- XOR(32, gpr.R(a), operand);
+ BindRMOutput(a, operand, inst.Rc);
+ XOR(32, gpr.R(a), gpr.R(operand));
}
else if (inst.SUBOP10 == 284) // eqvx
{
+ gpr.BindToRegister(a, true, true);
NOT(32, gpr.R(a));
- XOR(32, gpr.R(a), operand);
+ XOR(32, gpr.R(a), gpr.R(operand));
}
else
{
@@ -869,19 +878,21 @@ void Jit64::subfx(UGeckoInstruction inst)
else
{
gpr.Lock(a, b, d);
- gpr.BindToRegister(d, (d == a || d == b), true);
if (d == b)
{
+ BindRMOutput(d, a, inst.Rc);
SUB(32, gpr.R(d), gpr.R(a));
}
else if (d == a)
{
+ gpr.BindToRegister(d, true, true);
MOV(32, R(RSCRATCH), gpr.R(a));
MOV(32, gpr.R(d), gpr.R(b));
SUB(32, gpr.R(d), R(RSCRATCH));
}
else
{
+ gpr.BindToRegister(d, false, true);
MOV(32, gpr.R(d), gpr.R(b));
SUB(32, gpr.R(d), gpr.R(a));
}
@@ -1265,7 +1276,7 @@ void Jit64::addx(UGeckoInstruction inst)
{
int operand = ((d == a) ? b : a);
gpr.Lock(a, b, d);
- gpr.BindToRegister(d, true);
+ BindRMOutput(d, operand, inst.Rc);
ADD(32, gpr.R(d), gpr.R(operand));
if (inst.OE)
GenerateOverflow();
@@ -1363,17 +1374,18 @@ void Jit64::arithcx(UGeckoInstruction inst)
bool add = !!(inst.SUBOP10 & 2); // add or sub
int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.Lock(a, b, d);
- gpr.BindToRegister(d, d == a || d == b, true);
if (d == a && d != b)
{
if (add)
{
+ BindRMOutput(d, b, inst.Rc);
ADD(32, gpr.R(d), gpr.R(b));
}
else
{
// special case, because sub isn't reversible
+ gpr.BindToRegister(d, true, true);
MOV(32, R(RSCRATCH), gpr.R(a));
MOV(32, gpr.R(d), gpr.R(b));
SUB(32, gpr.R(d), R(RSCRATCH));
@@ -1381,6 +1393,7 @@ void Jit64::arithcx(UGeckoInstruction inst)
}
else
{
+ BindRMOutput(d, a, inst.Rc || d != b, d == b || d == a);
if (d != b)
MOV(32, gpr.R(d), gpr.R(b));
if (add)
@@ -1426,14 +1439,15 @@ void Jit64::rlwinmx(UGeckoInstruction inst)
int mask_size = inst.ME - inst.MB + 1;
gpr.Lock(a, s);
- gpr.BindToRegister(a, a == s);
if (a != s && left_shift && gpr.R(s).IsSimpleReg() && inst.SH <= 3)
{
+ gpr.BindToRegister(a, false);
LEA(32, gpr.RX(a), MScaled(gpr.RX(s), SCALE_1 << inst.SH, 0));
}
// common optimized case: byte/word extract
else if (simple_mask && !(inst.SH & (mask_size - 1)))
{
+ gpr.BindToRegister(a, a == s);
MOVZX(32, mask_size, gpr.RX(a), ExtractFromReg(s, inst.SH ? (32 - inst.SH) >> 3 : 0));
needs_sext = false;
}
@@ -1441,12 +1455,15 @@ void Jit64::rlwinmx(UGeckoInstruction inst)
else if (((mask >> inst.SH) << inst.SH) == mask && !left_shift &&
((mask >> inst.SH) == 0xff || (mask >> inst.SH) == 0xffff))
{
+ gpr.BindToRegister(a, a == s);
MOVZX(32, mask_size, gpr.RX(a), gpr.R(s));
SHL(32, gpr.R(a), Imm8(inst.SH));
needs_sext = inst.SH + mask_size >= 32;
}
else
{
+ // Cover all the cases for which we need the destination in a register.
+ BindRMOutput(a, s, inst.Rc || a != s || !(left_shift || right_shift), a == s);
if (a != s)
MOV(32, gpr.R(a), gpr.R(s));
@@ -1634,7 +1651,7 @@ void Jit64::negx(UGeckoInstruction inst)
else
{
gpr.Lock(a, d);
- gpr.BindToRegister(d, a == d, true);
+ BindRMOutput(d, a, inst.Rc || a != d, a == d);
if (a != d)
MOV(32, gpr.R(d), gpr.R(a));
NEG(32, gpr.R(d));
@@ -1763,15 +1780,16 @@ void Jit64::srawix(UGeckoInstruction inst)
if (amount != 0)
{
gpr.Lock(a, s);
- gpr.BindToRegister(a, a == s, true);
if (!js.op->wantsCA)
{
+ BindRMOutput(a, s, inst.Rc || a != s, a == s);
if (a != s)
MOV(32, gpr.R(a), gpr.R(s));
SAR(32, gpr.R(a), Imm8(amount));
}
else
{
+ gpr.BindToRegister(a, a == s, true);
MOV(32, R(RSCRATCH), gpr.R(s));
if (a != s)
MOV(32, gpr.R(a), R(RSCRATCH));
--
2.1.1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment