Created
December 15, 2014 03:52
-
-
Save FioraAeterna/2caa0fecbd29b2137c23 to your computer and use it in GitHub Desktop.
Dolphin: use memory operands wherever possibl
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| From 7df6919fc55d0f54ef9e8b87540a5b41718e7dc7 Mon Sep 17 00:00:00 2001 | |
| From: Fiora <[email protected]> | |
| Date: Mon, 27 Oct 2014 04:04:00 -0700 | |
| Subject: [PATCH] JIT: use memory destination operands wherever possible | |
| Avoids lots of redundant instructions in the case of destination | |
| operands that will never be used again in the block. | |
| --- | |
| Source/Core/Core/PowerPC/Jit64/Jit.h | 1 + | |
| Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 100 +++++++++++++++---------- | |
| 2 files changed, 60 insertions(+), 41 deletions(-) | |
| diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h | |
| index c583d83..d8b3ff8 100644 | |
| --- a/Source/Core/Core/PowerPC/Jit64/Jit.h | |
| +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h | |
| @@ -123,6 +123,7 @@ public: | |
| // Use to extract bytes from a register using the regcache. offset is in bytes. | |
| Gen::OpArg ExtractFromReg(int reg, int offset); | |
| void AndWithMask(Gen::X64Reg reg, u32 mask); | |
| + void BindRMOutput(int output, int input, bool force_bind = false, bool do_load = true); | |
| bool CheckMergedBranch(int crf); | |
| void DoMergedBranch(); | |
| void DoMergedBranchCondition(); | |
| diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp | |
| index 3574fa4..507b982 100644 | |
| --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp | |
| +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp | |
| @@ -195,6 +195,24 @@ void Jit64::AndWithMask(X64Reg reg, u32 mask) | |
| AND(32, R(reg), Imm32(mask)); | |
| } | |
| +// For operations where memory destination operands are supported, avoid binding the | |
| +// destination register if we don't need to, and bind the input instead. | |
| +void Jit64::BindRMOutput(int output, int input, bool force_bind, bool do_load) | |
| +{ | |
| + if ((js.op->gprInUse & (1 << output)) || force_bind) | |
| + { | |
| + gpr.BindToRegister(output, do_load); | |
| + } | |
| + else | |
| + { | |
| + if (output != input) | |
| + gpr.BindToRegister(input, true, false); | |
| + if (!do_load) | |
| + gpr.DiscardRegContentsIfCached(output); | |
| + gpr.KillImmediate(output, do_load, true); | |
| + } | |
| +} | |
| + | |
| // Following static functions are used in conjunction with regimmop | |
| static u32 Add(u32 a, u32 b) | |
| { | |
| @@ -230,7 +248,7 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void | |
| } | |
| else if (a == d) | |
| { | |
| - gpr.BindToRegister(d, true); | |
| + BindRMOutput(d, d, Rc); | |
| (this->*op)(32, gpr.R(d), Imm32(value)); //m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16; | |
| } | |
| else | |
| @@ -280,7 +298,7 @@ void Jit64::reg_imm(UGeckoInstruction inst) | |
| else if (inst.SIMM_16 == 0 && d != a && a != 0) | |
| { | |
| gpr.Lock(a, d); | |
| - gpr.BindToRegister(d, false, true); | |
| + BindRMOutput(d, a, false, false); | |
| MOV(32, gpr.R(d), gpr.R(a)); | |
| gpr.UnlockAll(); | |
| } | |
| @@ -615,34 +633,18 @@ void Jit64::boolX(UGeckoInstruction inst) | |
| { | |
| if ((inst.SUBOP10 == 28 /* andx */) || (inst.SUBOP10 == 444 /* orx */)) | |
| { | |
| + gpr.Lock(a, s); | |
| + BindRMOutput(a, s, inst.Rc, a == s); | |
| if (a != s) | |
| - { | |
| - gpr.Lock(a,s); | |
| - gpr.BindToRegister(a, false, true); | |
| MOV(32, gpr.R(a), gpr.R(s)); | |
| - } | |
| - else if (inst.Rc) | |
| - { | |
| - gpr.BindToRegister(a, true, false); | |
| - } | |
| needs_test = true; | |
| } | |
| else if ((inst.SUBOP10 == 476 /* nandx */) || (inst.SUBOP10 == 124 /* norx */)) | |
| { | |
| + gpr.Lock(a, s); | |
| + BindRMOutput(a, s, inst.Rc || a != s, a == s); | |
| if (a != s) | |
| - { | |
| - gpr.Lock(a,s); | |
| - gpr.BindToRegister(a, false, true); | |
| MOV(32, gpr.R(a), gpr.R(s)); | |
| - } | |
| - else if (inst.Rc) | |
| - { | |
| - gpr.BindToRegister(a, true, true); | |
| - } | |
| - else | |
| - { | |
| - gpr.KillImmediate(a, true, true); | |
| - } | |
| NOT(32, gpr.R(a)); | |
| needs_test = true; | |
| } | |
| @@ -661,66 +663,73 @@ void Jit64::boolX(UGeckoInstruction inst) | |
| } | |
| else if ((a == s) || (a == b)) | |
| { | |
| - gpr.Lock(a,((a == s) ? b : s)); | |
| - OpArg operand = ((a == s) ? gpr.R(b) : gpr.R(s)); | |
| - gpr.BindToRegister(a, true, true); | |
| + int operand = a == s ? b : s; | |
| + gpr.Lock(a, operand); | |
| if (inst.SUBOP10 == 28) // andx | |
| { | |
| - AND(32, gpr.R(a), operand); | |
| + BindRMOutput(a, operand, inst.Rc); | |
| + AND(32, gpr.R(a), gpr.R(operand)); | |
| } | |
| else if (inst.SUBOP10 == 476) // nandx | |
| { | |
| - AND(32, gpr.R(a), operand); | |
| + gpr.BindToRegister(a, true, true); | |
| + AND(32, gpr.R(a), gpr.R(operand)); | |
| NOT(32, gpr.R(a)); | |
| needs_test = true; | |
| } | |
| else if (inst.SUBOP10 == 60) // andcx | |
| { | |
| + gpr.BindToRegister(a, true, true); | |
| if (a == b) | |
| { | |
| NOT(32, gpr.R(a)); | |
| - AND(32, gpr.R(a), operand); | |
| + AND(32, gpr.R(a), gpr.R(operand)); | |
| } | |
| else | |
| { | |
| - MOV(32, R(RSCRATCH), operand); | |
| + MOV(32, R(RSCRATCH), gpr.R(operand)); | |
| NOT(32, R(RSCRATCH)); | |
| AND(32, gpr.R(a), R(RSCRATCH)); | |
| } | |
| } | |
| else if (inst.SUBOP10 == 444) // orx | |
| { | |
| - OR(32, gpr.R(a), operand); | |
| + BindRMOutput(a, operand, inst.Rc); | |
| + OR(32, gpr.R(a), gpr.R(operand)); | |
| } | |
| else if (inst.SUBOP10 == 124) // norx | |
| { | |
| - OR(32, gpr.R(a), operand); | |
| + gpr.BindToRegister(a, true, true); | |
| + OR(32, gpr.R(a), gpr.R(operand)); | |
| NOT(32, gpr.R(a)); | |
| needs_test = true; | |
| } | |
| else if (inst.SUBOP10 == 412) // orcx | |
| { | |
| + gpr.BindToRegister(a, true, true); | |
| if (a == b) | |
| { | |
| NOT(32, gpr.R(a)); | |
| - OR(32, gpr.R(a), operand); | |
| + OR(32, gpr.R(a), gpr.R(operand)); | |
| } | |
| else | |
| { | |
| - MOV(32, R(RSCRATCH), operand); | |
| + MOV(32, R(RSCRATCH), gpr.R(operand)); | |
| NOT(32, R(RSCRATCH)); | |
| OR(32, gpr.R(a), R(RSCRATCH)); | |
| } | |
| } | |
| else if (inst.SUBOP10 == 316) // xorx | |
| { | |
| - XOR(32, gpr.R(a), operand); | |
| + BindRMOutput(a, operand, inst.Rc); | |
| + XOR(32, gpr.R(a), gpr.R(operand)); | |
| } | |
| else if (inst.SUBOP10 == 284) // eqvx | |
| { | |
| + gpr.BindToRegister(a, true, true); | |
| NOT(32, gpr.R(a)); | |
| - XOR(32, gpr.R(a), operand); | |
| + XOR(32, gpr.R(a), gpr.R(operand)); | |
| } | |
| else | |
| { | |
| @@ -869,19 +878,21 @@ void Jit64::subfx(UGeckoInstruction inst) | |
| else | |
| { | |
| gpr.Lock(a, b, d); | |
| - gpr.BindToRegister(d, (d == a || d == b), true); | |
| if (d == b) | |
| { | |
| + BindRMOutput(d, a, inst.Rc); | |
| SUB(32, gpr.R(d), gpr.R(a)); | |
| } | |
| else if (d == a) | |
| { | |
| + gpr.BindToRegister(d, true, true); | |
| MOV(32, R(RSCRATCH), gpr.R(a)); | |
| MOV(32, gpr.R(d), gpr.R(b)); | |
| SUB(32, gpr.R(d), R(RSCRATCH)); | |
| } | |
| else | |
| { | |
| + gpr.BindToRegister(d, false, true); | |
| MOV(32, gpr.R(d), gpr.R(b)); | |
| SUB(32, gpr.R(d), gpr.R(a)); | |
| } | |
| @@ -1265,7 +1276,7 @@ void Jit64::addx(UGeckoInstruction inst) | |
| { | |
| int operand = ((d == a) ? b : a); | |
| gpr.Lock(a, b, d); | |
| - gpr.BindToRegister(d, true); | |
| + BindRMOutput(d, operand, inst.Rc); | |
| ADD(32, gpr.R(d), gpr.R(operand)); | |
| if (inst.OE) | |
| GenerateOverflow(); | |
| @@ -1363,17 +1374,18 @@ void Jit64::arithcx(UGeckoInstruction inst) | |
| bool add = !!(inst.SUBOP10 & 2); // add or sub | |
| int a = inst.RA, b = inst.RB, d = inst.RD; | |
| gpr.Lock(a, b, d); | |
| - gpr.BindToRegister(d, d == a || d == b, true); | |
| if (d == a && d != b) | |
| { | |
| if (add) | |
| { | |
| + BindRMOutput(d, b, inst.Rc); | |
| ADD(32, gpr.R(d), gpr.R(b)); | |
| } | |
| else | |
| { | |
| // special case, because sub isn't reversible | |
| + gpr.BindToRegister(d, true, true); | |
| MOV(32, R(RSCRATCH), gpr.R(a)); | |
| MOV(32, gpr.R(d), gpr.R(b)); | |
| SUB(32, gpr.R(d), R(RSCRATCH)); | |
| @@ -1381,6 +1393,7 @@ void Jit64::arithcx(UGeckoInstruction inst) | |
| } | |
| else | |
| { | |
| + BindRMOutput(d, a, inst.Rc || d != b, d == b || d == a); | |
| if (d != b) | |
| MOV(32, gpr.R(d), gpr.R(b)); | |
| if (add) | |
| @@ -1426,14 +1439,15 @@ void Jit64::rlwinmx(UGeckoInstruction inst) | |
| int mask_size = inst.ME - inst.MB + 1; | |
| gpr.Lock(a, s); | |
| - gpr.BindToRegister(a, a == s); | |
| if (a != s && left_shift && gpr.R(s).IsSimpleReg() && inst.SH <= 3) | |
| { | |
| + gpr.BindToRegister(a, false); | |
| LEA(32, gpr.RX(a), MScaled(gpr.RX(s), SCALE_1 << inst.SH, 0)); | |
| } | |
| // common optimized case: byte/word extract | |
| else if (simple_mask && !(inst.SH & (mask_size - 1))) | |
| { | |
| + gpr.BindToRegister(a, a == s); | |
| MOVZX(32, mask_size, gpr.RX(a), ExtractFromReg(s, inst.SH ? (32 - inst.SH) >> 3 : 0)); | |
| needs_sext = false; | |
| } | |
| @@ -1441,12 +1455,15 @@ void Jit64::rlwinmx(UGeckoInstruction inst) | |
| else if (((mask >> inst.SH) << inst.SH) == mask && !left_shift && | |
| ((mask >> inst.SH) == 0xff || (mask >> inst.SH) == 0xffff)) | |
| { | |
| + gpr.BindToRegister(a, a == s); | |
| MOVZX(32, mask_size, gpr.RX(a), gpr.R(s)); | |
| SHL(32, gpr.R(a), Imm8(inst.SH)); | |
| needs_sext = inst.SH + mask_size >= 32; | |
| } | |
| else | |
| { | |
| + // Cover all the cases for which we need the destination in a register. | |
| + BindRMOutput(a, s, inst.Rc || a != s || !(left_shift || right_shift), a == s); | |
| if (a != s) | |
| MOV(32, gpr.R(a), gpr.R(s)); | |
| @@ -1634,7 +1651,7 @@ void Jit64::negx(UGeckoInstruction inst) | |
| else | |
| { | |
| gpr.Lock(a, d); | |
| - gpr.BindToRegister(d, a == d, true); | |
| + BindRMOutput(d, a, inst.Rc || a != d, a == d); | |
| if (a != d) | |
| MOV(32, gpr.R(d), gpr.R(a)); | |
| NEG(32, gpr.R(d)); | |
| @@ -1763,15 +1780,16 @@ void Jit64::srawix(UGeckoInstruction inst) | |
| if (amount != 0) | |
| { | |
| gpr.Lock(a, s); | |
| - gpr.BindToRegister(a, a == s, true); | |
| if (!js.op->wantsCA) | |
| { | |
| + BindRMOutput(a, s, inst.Rc || a != s, a == s); | |
| if (a != s) | |
| MOV(32, gpr.R(a), gpr.R(s)); | |
| SAR(32, gpr.R(a), Imm8(amount)); | |
| } | |
| else | |
| { | |
| + gpr.BindToRegister(a, a == s, true); | |
| MOV(32, R(RSCRATCH), gpr.R(s)); | |
| if (a != s) | |
| MOV(32, gpr.R(a), R(RSCRATCH)); | |
| -- | |
| 2.1.1 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment