Discussion:
vex: r3335 - in /trunk/priv: host_amd64_defs.c host_amd64_defs.h main_main.c
(too old to reply)
s***@valgrind.org
2017-03-28 14:57:17 UTC
Permalink
Raw Message
Author: sewardj
Date: Tue Mar 28 15:57:17 2017
New Revision: 3335

Log:
Implement the most important cases for amd64 direct-reload optimisation:

cmpq $imm32, %vreg -> cmpq $imm32, (stack-slot-of-vreg)
orq %vreg, %reg -> orq (stack-slot-of-vreg), %reg

This is in support of "Bug 375839 - Temporary storage exhausted, when long
sequence of vfmadd231ps instructions to be executed", and reduces code size by
around 3% in that case.


Modified:
trunk/priv/host_amd64_defs.c
trunk/priv/host_amd64_defs.h
trunk/priv/main_main.c

Modified: trunk/priv/host_amd64_defs.c
==============================================================================
--- trunk/priv/host_amd64_defs.c (original)
+++ trunk/priv/host_amd64_defs.c Tue Mar 28 15:57:17 2017
@@ -1995,6 +1995,43 @@
}
}

+AMD64Instr* directReload_AMD64( AMD64Instr* i, HReg vreg, Short spill_off )
+{
+ vassert(spill_off >= 0 && spill_off < 10000); /* let's say */
+
+ /* Deal with form: src=RMI_Reg, dst=Reg where src == vreg
+ Convert to: src=RMI_Mem, dst=Reg
+ */
+ if (i->tag == Ain_Alu64R
+ && (i->Ain.Alu64R.op == Aalu_MOV || i->Ain.Alu64R.op == Aalu_OR
+ || i->Ain.Alu64R.op == Aalu_XOR)
+ && i->Ain.Alu64R.src->tag == Armi_Reg
+ && sameHReg(i->Ain.Alu64R.src->Armi.Reg.reg, vreg)) {
+ vassert(! sameHReg(i->Ain.Alu64R.dst, vreg));
+ return AMD64Instr_Alu64R(
+ i->Ain.Alu64R.op,
+ AMD64RMI_Mem( AMD64AMode_IR( spill_off, hregAMD64_RBP())),
+ i->Ain.Alu64R.dst
+ );
+ }
+
+ /* Deal with form: src=RMI_Imm, dst=Reg where dst == vreg
+ Convert to: src=RI_Imm, dst=Mem
+ */
+ if (i->tag == Ain_Alu64R
+ && (i->Ain.Alu64R.op == Aalu_CMP)
+ && i->Ain.Alu64R.src->tag == Armi_Imm
+ && sameHReg(i->Ain.Alu64R.dst, vreg)) {
+ return AMD64Instr_Alu64M(
+ i->Ain.Alu64R.op,
+ AMD64RI_Imm( i->Ain.Alu64R.src->Armi.Imm.imm32 ),
+ AMD64AMode_IR( spill_off, hregAMD64_RBP())
+ );
+ }
+
+ return NULL;
+}
+

/* --------- The amd64 assembler (bleh.) --------- */

@@ -2607,6 +2644,39 @@
goto bad;
}
}
+ /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP. MUL is not
+ allowed here. (This is derived from the x86 version of same). */
+ opc = subopc_imm = opc_imma = 0;
+ switch (i->Ain.Alu64M.op) {
+ case Aalu_CMP: opc = 0x39; subopc_imm = 7; break;
+ default: goto bad;
+ }
+ switch (i->Ain.Alu64M.src->tag) {
+ /*
+ case Xri_Reg:
+ *p++ = toUChar(opc);
+ p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg,
+ i->Xin.Alu32M.dst);
+ goto done;
+ */
+ case Ari_Imm:
+ if (fits8bits(i->Ain.Alu64M.src->Ari.Imm.imm32)) {
+ *p++ = rexAMode_M_enc(subopc_imm, i->Ain.Alu64M.dst);
+ *p++ = 0x83;
+ p = doAMode_M_enc(p, subopc_imm, i->Ain.Alu64M.dst);
+ *p++ = toUChar(0xFF & i->Ain.Alu64M.src->Ari.Imm.imm32);
+ goto done;
+ } else {
+ *p++ = rexAMode_M_enc(subopc_imm, i->Ain.Alu64M.dst);
+ *p++ = 0x81;
+ p = doAMode_M_enc(p, subopc_imm, i->Ain.Alu64M.dst);
+ p = emit32(p, i->Ain.Alu64M.src->Ari.Imm.imm32);
+ goto done;
+ }
+ default:
+ goto bad;
+ }
+
break;

case Ain_Sh64:

Modified: trunk/priv/host_amd64_defs.h
==============================================================================
--- trunk/priv/host_amd64_defs.h (original)
+++ trunk/priv/host_amd64_defs.h Tue Mar 28 15:57:17 2017
@@ -802,6 +802,9 @@
extern void genReload_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
HReg rreg, Int offset, Bool );

+extern AMD64Instr* directReload_AMD64 ( AMD64Instr* i,
+ HReg vreg, Short spill_off );
+
extern const RRegUniverse* getRRegUniverse_AMD64 ( void );

extern HInstrArray* iselSB_AMD64 ( const IRSB*,

Modified: trunk/priv/main_main.c
==============================================================================
--- trunk/priv/main_main.c (original)
+++ trunk/priv/main_main.c Tue Mar 28 15:57:17 2017
@@ -433,6 +433,7 @@
mapRegs = (__typeof__(mapRegs)) AMD64FN(mapRegs_AMD64Instr);
genSpill = (__typeof__(genSpill)) AMD64FN(genSpill_AMD64);
genReload = (__typeof__(genReload)) AMD64FN(genReload_AMD64);
+ directReload = (__typeof__(directReload)) AMD64FN(directReload_AMD64);
ppInstr = (__typeof__(ppInstr)) AMD64FN(ppAMD64Instr);
ppReg = (__typeof__(ppReg)) AMD64FN(ppHRegAMD64);
iselSB = AMD64FN(iselSB_AMD64);

Loading...