Index: jitrino/config/em64t/server.emconf =================================================================== --- jitrino/config/em64t/server.emconf (revision 527094) +++ jitrino/config/em64t/server.emconf (working copy) @@ -47,28 +47,21 @@ -XDjit.SD1_OPT.path.optimizer=ssa,simplify,dce,uce,vp_instrument,devirt_virtual,edge_instrument,dessa,statprof,markglobals -XDjit.SD1_OPT.path.devirt_virtual=devirt --XDjit.SD1_OPT.path.codegen=lock_method,bbp,gcpoints,cafl,dce1,i8l-,early_prop-,itrace-,native,constraints,dce2,regalloc,spillgen,layout,copy,rce-,stack,break-,iprof-,emitter!,si_insts,gcmap,info,unlock_method +-XDjit.SD1_OPT.path.codegen=lock_method,bbp,gcpoints,cafl,dce1,i8l-,early_prop-,itrace-,native,constraints,dce2,regalloc,layout,copy,rce-,stack,break-,iprof-,emitter!,si_insts,gcmap,info,unlock_method -XDjit.SD1_OPT.path.dce1=cg_dce -XDjit.SD1_OPT.path.dce2=cg_dce --XDjit.SD1_OPT.path.regalloc=bp_regalloc1,bp_regalloc2 --XDjit.SD1_OPT.path.bp_regalloc1=bp_regalloc --XDjit.SD1_OPT.path.bp_regalloc2=bp_regalloc -XDjit.SD1_OPT.arg.codegen.dce1.early=yes --XDjit.SD1_OPT.arg.codegen.regalloc.bp_regalloc1.regs=ALL_GP --XDjit.SD1_OPT.arg.codegen.regalloc.bp_regalloc2.regs=ALL_XMM +-XDjit.SD1_OPT.path.regalloc=webmaker,cg_regalloc,spillgen +-XDjit.SD1_OPT.arg.codegen.regalloc.webmaker.calc=true - -XDjit.SD2_OPT.path=opt_init,translator,optimizer,hir2lir,codegen -XDjit.SD2_OPT.path.optimizer=ssa,simplify,dce,uce,devirt_virtual,edge_annotate,unguard,devirt_intf,inline,uce,purge,simplify,dce,uce,so2-,simplify,dce,uce,escape,hvn,dce,uce,inline_helpers,purge,simplify,uce,dce,dessa,statprof,peel,ssa,hvn,simplify,dce,uce,lower,dce,uce,memopt,reassoc,dce,uce,hvn,dce,uce,abcd,dce,uce,gcm,dessa,fastArrayFill,statprof,markglobals --XDjit.SD2_OPT.path.codegen=lock_method,bbp,gcpoints,cafl,dce1,i8l-,early_prop-,itrace-,native,cg_fastArrayFill,constraints,dce2,regalloc,spillgen,layout,copy,rce-,stack,break-,iprof-,emitter!,si_insts,gcmap,info,unlock_method +-XDjit.SD2_OPT.path.codegen=lock_method,bbp,gcpoints,cafl,dce1,i8l-,early_prop-,itrace-,native,cg_fastArrayFill,constraints,dce2,regalloc,layout,copy,rce-,stack,break-,iprof-,emitter!,si_insts,gcmap,info,unlock_method -XDjit.SD2_OPT.path.dce1=cg_dce -XDjit.SD2_OPT.path.dce2=cg_dce --XDjit.SD2_OPT.path.regalloc=bp_regalloc1,bp_regalloc2 --XDjit.SD2_OPT.path.bp_regalloc1=bp_regalloc --XDjit.SD2_OPT.path.bp_regalloc2=bp_regalloc #devirt configuration -XDjit.SD2_OPT.path.devirt_virtual=devirt @@ -130,10 +123,11 @@ -XDjit.SD2_OPT.arg.codegen.dce1.early=yes --XDjit.SD2_OPT.arg.codegen.regalloc.bp_regalloc1.regs=ALL_GP --XDjit.SD2_OPT.arg.codegen.regalloc.bp_regalloc2.regs=ALL_XMM -XDjit.arg.codegen.emitter.align=0 +-XDjit.SD2_OPT.path.regalloc=webmaker,cg_regalloc,spillgen +-XDjit.SD2_OPT.arg.codegen.regalloc.webmaker.calc=true + #system properties -Djava.compiler=server Index: jitrino/config/ia32/server.emconf =================================================================== --- jitrino/config/ia32/server.emconf (revision 527094) +++ jitrino/config/ia32/server.emconf (working copy) @@ -47,29 +47,23 @@ -XDjit.SD1_OPT.path.optimizer=ssa,simplify,dce,uce,vp_instrument,devirt_virtual,edge_instrument,dessa,statprof,markglobals -XDjit.SD1_OPT.path.devirt_virtual=devirt --XDjit.SD1_OPT.path.codegen=lock_method,bbp,btr,gcpoints,cafl,dce1,i8l,api_magic,early_prop,peephole,itrace-,native,constraints,dce2,regalloc,spillgen,layout,copy,rce+,stack,break-,iprof-,peephole,emitter!,si_insts,gcmap,info,unlock_method +-XDjit.SD1_OPT.path.codegen=lock_method,bbp,btr,gcpoints,cafl,dce1,i8l,api_magic,early_prop,peephole,itrace-,native,constraints,dce2,regalloc,layout,copy,rce+,stack,break-,iprof-,peephole,emitter!,si_insts,gcmap,info,unlock_method -XDjit.SD1_OPT.path.dce1=cg_dce -XDjit.SD1_OPT.path.dce2=cg_dce --XDjit.SD1_OPT.path.regalloc=bp_regalloc1,bp_regalloc2 --XDjit.SD1_OPT.path.bp_regalloc1=bp_regalloc --XDjit.SD1_OPT.path.bp_regalloc2=bp_regalloc -XDjit.SD1_OPT.arg.codegen.dce1.early=yes --XDjit.SD1_OPT.arg.codegen.regalloc.bp_regalloc1.regs=ALL_GP --XDjit.SD1_OPT.arg.codegen.regalloc.bp_regalloc2.regs=ALL_XMM -XDjit.SD1_OPT.arg.codegen.btr.insertCMOVs=no -XDjit.SD1_OPT.arg.codegen.btr.removeConstCompare=yes +-XDjit.SD1_OPT.path.regalloc=webmaker,cg_regalloc,spillgen +-XDjit.SD1_OPT.arg.codegen.regalloc.webmaker.calc=true -XDjit.SD2_OPT.path=opt_init,translator,optimizer,hir2lir,codegen -XDjit.SD2_OPT.path.optimizer=ssa,simplify,dce,uce,devirt_virtual,edge_annotate,unguard,devirt_intf,inline,uce,purge,simplify,dce,uce,lazyexc,so2-,simplify,dce,uce,escape,inline_helpers,purge,simplify,uce,dce,dessa,statprof,peel,ssa,hvn,simplify,dce,uce,lower,dce,uce,memopt,reassoc,dce,uce,hvn,dce,uce,abcd,dce,uce,gcm,dessa,fastArrayFill,statprof,markglobals --XDjit.SD2_OPT.path.codegen=lock_method,bbp,btr,gcpoints,cafl,dce1,i8l,api_magic,early_prop,peephole,itrace-,native,cg_fastArrayFill,constraints,dce2,regalloc,spillgen,layout,copy,rce+,stack,break-,iprof-,peephole,emitter!,si_insts,gcmap,info,unlock_method +-XDjit.SD2_OPT.path.codegen=lock_method,bbp,btr,gcpoints,cafl,dce1,i8l,api_magic,early_prop,peephole,itrace-,native,cg_fastArrayFill,constraints,dce2,regalloc,layout,copy,rce+,stack,break-,iprof-,peephole,emitter!,si_insts,gcmap,info,unlock_method -XDjit.SD2_OPT.path.dce1=cg_dce -XDjit.SD2_OPT.path.dce2=cg_dce --XDjit.SD2_OPT.path.regalloc=bp_regalloc1,bp_regalloc2 --XDjit.SD2_OPT.path.bp_regalloc1=bp_regalloc --XDjit.SD2_OPT.path.bp_regalloc2=bp_regalloc #devirt configuration -XDjit.SD2_OPT.path.devirt_virtual=devirt @@ -129,12 +123,12 @@ -XDjit.SD2_OPT.arg.optimizer.inline_helpers.instanceOf_hotnessPercent=1 -XDjit.SD2_OPT.arg.codegen.dce1.early=yes --XDjit.SD2_OPT.arg.codegen.regalloc.bp_regalloc1.regs=ALL_GP --XDjit.SD2_OPT.arg.codegen.regalloc.bp_regalloc2.regs=ALL_XMM -XDjit.SD2_OPT.arg.codegen.btr.insertCMOVs=no -XDjit.SD2_OPT.arg.codegen.btr.removeConstCompare=yes -XDjit.arg.codegen.emitter.align=4 +-XDjit.SD2_OPT.path.regalloc=webmaker,cg_regalloc,spillgen +-XDjit.SD2_OPT.arg.codegen.regalloc.webmaker.calc=true #system properties -Djava.compiler=server Index: jitrino/src/codegenerator/ia32/Ia32ConstraintsResolver.cpp =================================================================== --- jitrino/src/codegenerator/ia32/Ia32ConstraintsResolver.cpp (revision 527094) +++ jitrino/src/codegenerator/ia32/Ia32ConstraintsResolver.cpp (working copy) @@ -19,7 +19,7 @@ * @version $Revision: 1.11.14.2.4.3 $ */ -#include "Ia32IRManager.h" +#include "Ia32ConstraintsResolver.h" #include "Ia32Printer.h" namespace Jitrino @@ -207,108 +207,6 @@ * For more details please refer to ConstraintsResolverImpl source code */ -class ConstraintsResolverImpl -{ -public: - ConstraintsResolverImpl(IRManager &irm) - :irManager(irm), - memoryManager(irManager.getOpndCount()*16, "ConstraintsResolverImpl"), - basicBlocks(memoryManager, 0), originalOpndCount(0), - liveOpnds(memoryManager,0), - liveAtDispatchBlockEntry(memoryManager,0), - needsOriginalOpnd(memoryManager,0), - hoveringOpnds(memoryManager,0), - opndReplaceWorkset(memoryManager,0), - opndUsage(memoryManager,0) - { - } - - void run(); - -private: - /** Get the priority of a the node for sorting in createBasicBlockArray */ - double getBasicBlockPriority(Node * node); - /** Fills the basicBlocks array and orders it according to block exec count (hottest first) */ - void createBasicBlockArray(); - /** Fills the liveAtDispatchBlockEntry bit set with operands live at dispatch node entries */ - void calculateLiveAtDispatchBlockEntries(); - /** Pre-sets calculated constraints for each operand */ - void calculateStartupOpndConstraints(); - /** Scans basicBlocks array and calls resolveConstraints(BasicBlock * bb) for each entry */ - void resolveConstraints(); - /** Traverses instructions of bb and calls resolveConstraints(Inst *) - * for each inst - */ - void resolveConstraints(Node* bb); - /** - Main logic of constraint resolution for each instrution - */ - void resolveConstraintsWithOG(Inst * inst); - - /** returns constraint describing call-safe locations for opnd in CallInst inst */ - Constraint getCalleeSaveConstraint(Inst * inst, Opnd * opnd); - /** returns constraint describing safe locations for operands live at dispatch node entries */ - Constraint getDispatchEntryConstraint(Opnd * opnd); - - static bool constraintIsWorse(Constraint cnew, Constraint cold, unsigned normedBBExecCount, - unsigned splitThresholdForNoRegs, unsigned splitThresholdFor1Reg, unsigned splitThresholdFor4Regs - ); - - - - /** Reference to IRManager */ - IRManager& irManager; - - /** Private memory manager for this algorithm */ - MemoryManager memoryManager; - - /** Array of basic blocks to be handled */ - Nodes basicBlocks; - - /** result of irManager.getOpndCount before the pass */ - uint32 originalOpndCount; - - /** Current live set, updated as usual for each instruction in resolveConstraints(Inst*) */ - BitSet liveOpnds; - - /** Bit set of operands live at dispatch node entries */ - BitSet liveAtDispatchBlockEntry; - - /** Bit set of operands for which original operand should be used wherever possible. - * Currently this is only for operands which are live at dispatch block entries. - */ - BitSet needsOriginalOpnd; - - /** Temporary bit set of hovering operands (live across call sites) - * Is initialized and used only during resolveConstraints(Inst*) - */ - BitSet hoveringOpnds; - - /** An array of current substitutions for operands - * Is filled as a result of operand splitting. - * Reset for each basic blocks (all replacements are local within basic blocks) - */ - StlVector opndReplaceWorkset; - - - StlVector opndUsage; - - unsigned callSplitThresholdForNoRegs; - unsigned callSplitThresholdFor1Reg; - unsigned callSplitThresholdFor4Regs; - - unsigned defSplitThresholdForNoRegs; - unsigned defSplitThresholdFor1Reg; - unsigned defSplitThresholdFor4Regs; - - unsigned useSplitThresholdForNoRegs; - unsigned useSplitThresholdFor1Reg; - unsigned useSplitThresholdFor4Regs; - - friend class ConstraintsResolver; -}; - - //_________________________________________________________________________________________________ Constraint ConstraintsResolverImpl::getCalleeSaveConstraint(Inst * inst, Opnd * opnd) { @@ -331,7 +229,8 @@ void ConstraintsResolverImpl::run() { // Set all instruction constraints for EntryPoints, CALLs and RETs - irManager.applyCallingConventions(); + if (!second) + irManager.applyCallingConventions(); // Initialization originalOpndCount=irManager.getOpndCount(); liveOpnds.resizeClear(originalOpndCount); @@ -682,25 +581,16 @@ // call the private implementation of the algorithm ConstraintsResolverImpl impl(*irManager); - impl.callSplitThresholdForNoRegs = (unsigned)-1; // always getArg("callSplitThresholdForNoRegs", impl.callSplitThresholdForNoRegs); - impl.callSplitThresholdFor1Reg = 1; // for very cold code getArg("callSplitThresholdFor1Reg", impl.callSplitThresholdFor1Reg); - impl.callSplitThresholdFor4Regs = 1; // for very cold code getArg("callSplitThresholdFor4Regs", impl.callSplitThresholdFor4Regs); - impl.defSplitThresholdForNoRegs = 0; // never getArg("defSplitThresholdForNoRegs", impl.defSplitThresholdForNoRegs); - impl.defSplitThresholdFor1Reg = 0; // never getArg("defSplitThresholdFor1Reg", impl.defSplitThresholdFor1Reg); - impl.defSplitThresholdFor4Regs = 0; // never getArg("defSplitThresholdFor4Regs", impl.defSplitThresholdFor4Regs); - impl.useSplitThresholdForNoRegs = 0; // never getArg("useSplitThresholdForNoRegs", impl.useSplitThresholdForNoRegs); - impl.useSplitThresholdFor1Reg = 0; // never getArg("useSplitThresholdFor1Reg", impl.useSplitThresholdFor1Reg); - impl.useSplitThresholdFor4Regs = 0; // never getArg("useSplitThresholdFor4Regs", impl.useSplitThresholdFor4Regs); impl.run(); Index: jitrino/src/codegenerator/ia32/Ia32ConstraintsResolver.h =================================================================== --- jitrino/src/codegenerator/ia32/Ia32ConstraintsResolver.h (revision 0) +++ jitrino/src/codegenerator/ia32/Ia32ConstraintsResolver.h (revision 0) @@ -0,0 +1,283 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @author Vyacheslav P. Shakin + * @version $Revision: 1.11.14.2.4.3 $ + */ + +#include "Ia32IRManager.h" +//#include "Ia32Printer.h" + +namespace Jitrino +{ +namespace Ia32{ + +//======================================================================================== +// class Ia32ConstraintsResolver +//======================================================================================== +/** + * class Ia32ConstraintsResolver performs resolution of operand constraints + * and assigns calculated constraints (Opnd::ConstraintKind_Calculated) to operands. + * The resulting calculated constraints of operands determine allowable physical location + * for the operand. + * + * This transformer allows to insert operands into instructions before it + * regardless instruction constraints except that Initial constraints of explicit + * instruction operands must have non-null intersections with corresponding constraints + * of at least one opcode group of the instruction. + * + * ConstraintResolver analyzes instruction constraints and splits operands when necessary. + * + * This transformer ensures that + * 1) All instruction constraints for EntryPoints, CALLs and RETs + * are set appropriately (IRManager::applyCallingConventions()) + * 2) All operands has non-null calculated constraints + * 3) All operands fits into instructions they are used in (in terms of instruction constraints) + * For example: + * Original code piece: + * I38: (AD:s65:double) =CopyPseudoInst (AU:t1:double) + * I32: MULSD .s65.:double,.t2:double + * I33: RET t66(0):int16 (AU:s65:double) + * + * RET imposes constraint on s65 requiring to place it into FP0 register + * (its FP0D alias in this particular case) + * MULSD imposes constraint on s65 requiring to place it into XMM register + * + * After the pass: + * I38: (AD:s65:double) =CopyPseudoInst (AU:t1:double) + * I32: MULSD .s65.:double,.t2:double + * I46: (AD:t75:double) =CopyPseudoInst (AU:s65:double) + * I33: RET t66(20):int16 (AU:t75:double) + * + * Thus, ConstraintResolver inserted I46 splitting s65 to s65 and t75 + * s65 is assigned with Mem|XMM calculated constraint and t75 + * is assigned with FP0D calculated calculated constraint + * + * 4) If the live range of an operand crosses a call site and the operand is not redefined + * in the call site, the calculated constraint of the operand is narrowed the callee-save regs + * or memory (stack) + * + * 5) If the operand (referred as original operand here) is live at entry of a catch handler + * then necessary operand splitting is performed as close as possible to the instruction + * which caused the splitting and original operand is used before and after the instruction. + * + * The main principle of the algorithm is anding of instruction constraints into + * operand calculated constraints and splitting operands to ensure that the calculated constraint + * is not null + * + * This transformer must be inserted before register allocator which relies on + * calculated operand constraints. + * + * The implementation of this transformer is located in the ConstraintResolverImpl class. + * + */ + + +//======================================================================================== +// class ConstraintsResolverImpl +//======================================================================================== + +/** + * class Ia32ConstraintsResolverImpl is an implementation of simple constraint resolution algorithm + * The algorithm takes one-pass over CFG. + * + * The algorithm works as follows: + * + * 1) Creates an array of basic blocks and orders by bb->getExecCount() + * in createBasicBlockArray(). + * Thus, the algorithm handles hottest basic blocks first and constraints are assigned to operands first + * from the most frequently used instructions + * + * 2) Collects a bit vector of all operands live at entries of all dispatch node entries + * in calculateLiveAtDispatchBlockEntries() + * + * 3) For all operands: + * - If an operand has already been assigned to some location + * (its location constraint is not null) the calculated constraint is set to + * the location constraint + * + * - If an operand is live at entry of a dispatch node + * the calculated constraint is set to the constraint + * preserving operand values during exception throwing + * This constraint is returned by getDispatchEntryConstraint + * In fact this is the constriant for the DRL calling convention + * + * This is done in calculateStartupOpndConstraints() + * Originally all calculateed constraints are equial to Initial constraints + * + * 4) Walks through all basic blocks collected and arranged at step 1 + * in resolveConstraints() + * + * The opndReplaceWorkset array of operand replacements is maintained + * (indexed by from-operand id). + * + * This is the array of current replacement for operands + * and is reset for each basic block (local within basic blocks) + * + * This array is filled as a result of operand splitting and indicates + * which operand must be used instead of original ones for all the instructions + * above the one caused splitting + * + * 4.1) Walks throw all instruction of a basic block in backward order + * in resolveConstraints(BasicBlock * bb) + * 4.1.1) resolves constraints for each instruction + * in resolveConstraints(Inst * inst); + * + * To do this already collected calculated constraint of + * either original operand or its current replacement is anded + * with instruction constraint for this operand occurence and + * if the result is null, new operand is created and substituted instead + * + * 4.1.1.1) All def operands of the isntruction are traversed + * and operand splitting is performed after the instruction (when necessary) + * def&use cases are also handled during this step + * 4.1.1.2) If the instruction is CALL, all hovering operands of + * the isntruction are traversed. + * + * Hovering operands are operands which are live across a call site and are not + * redefined in the call site + * This step ensures operands are saved in callee-save regs or memory + * and takes into account whether an operand is live at dispatch node entries + * + * Operand splitting is performed before the instruction (when necessary) + * 4.1.1.3) All use operands of the instruction are traversed + * and operand splitting is performed before the instruction (when necessary) + * + * The current implementation doesn't deal properly with conditional memory constraints. + * I.e. it doesn't resolve properly things like ADD m, m when both operands are already + * assigned. + * + * For more details please refer to ConstraintsResolverImpl source code + */ + +class ConstraintsResolverImpl +{ +public: + ConstraintsResolverImpl(IRManager &irm, bool _second = false) + :irManager(irm), + memoryManager(irManager.getOpndCount()*16, "ConstraintsResolverImpl"), + basicBlocks(memoryManager, 0), originalOpndCount(0), + liveOpnds(memoryManager,0), + liveAtDispatchBlockEntry(memoryManager,0), + needsOriginalOpnd(memoryManager,0), + hoveringOpnds(memoryManager,0), + opndReplaceWorkset(memoryManager,0), + opndUsage(memoryManager,0), + + callSplitThresholdForNoRegs((unsigned)-1), // always + callSplitThresholdFor1Reg(1), // for very cold code + callSplitThresholdFor4Regs(1), // for very cold code + defSplitThresholdForNoRegs(0), // never + defSplitThresholdFor1Reg(0), // never + defSplitThresholdFor4Regs(0), // never + useSplitThresholdForNoRegs(0), // never + useSplitThresholdFor1Reg(0), // never + useSplitThresholdFor4Regs(0), // never + + second(_second) + { + } + + void run(); + +private: + /** Get the priority of a the node for sorting in createBasicBlockArray */ + double getBasicBlockPriority(Node * node); + /** Fills the basicBlocks array and orders it according to block exec count (hottest first) */ + void createBasicBlockArray(); + /** Fills the liveAtDispatchBlockEntry bit set with operands live at dispatch node entries */ + void calculateLiveAtDispatchBlockEntries(); + /** Pre-sets calculated constraints for each operand */ + void calculateStartupOpndConstraints(); + /** Scans basicBlocks array and calls resolveConstraints(BasicBlock * bb) for each entry */ + void resolveConstraints(); + /** Traverses instructions of bb and calls resolveConstraints(Inst *) + * for each inst + */ + void resolveConstraints(Node* bb); + /** + Main logic of constraint resolution for each instrution + */ + void resolveConstraintsWithOG(Inst * inst); + + /** returns constraint describing call-safe locations for opnd in CallInst inst */ + Constraint getCalleeSaveConstraint(Inst * inst, Opnd * opnd); + /** returns constraint describing safe locations for operands live at dispatch node entries */ + Constraint getDispatchEntryConstraint(Opnd * opnd); + + static bool constraintIsWorse(Constraint cnew, Constraint cold, unsigned normedBBExecCount, + unsigned splitThresholdForNoRegs, unsigned splitThresholdFor1Reg, unsigned splitThresholdFor4Regs + ); + + + + /** Reference to IRManager */ + IRManager& irManager; + + /** Private memory manager for this algorithm */ + MemoryManager memoryManager; + + /** Array of basic blocks to be handled */ + Nodes basicBlocks; + + /** result of irManager.getOpndCount before the pass */ + uint32 originalOpndCount; + + /** Current live set, updated as usual for each instruction in resolveConstraints(Inst*) */ + BitSet liveOpnds; + + /** Bit set of operands live at dispatch node entries */ + BitSet liveAtDispatchBlockEntry; + + /** Bit set of operands for which original operand should be used wherever possible. + * Currently this is only for operands which are live at dispatch block entries. + */ + BitSet needsOriginalOpnd; + + /** Temporary bit set of hovering operands (live across call sites) + * Is initialized and used only during resolveConstraints(Inst*) + */ + BitSet hoveringOpnds; + + /** An array of current substitutions for operands + * Is filled as a result of operand splitting. + * Reset for each basic blocks (all replacements are local within basic blocks) + */ + StlVector opndReplaceWorkset; + + + StlVector opndUsage; + + unsigned callSplitThresholdForNoRegs; + unsigned callSplitThresholdFor1Reg; + unsigned callSplitThresholdFor4Regs; + + unsigned defSplitThresholdForNoRegs; + unsigned defSplitThresholdFor1Reg; + unsigned defSplitThresholdFor4Regs; + + unsigned useSplitThresholdForNoRegs; + unsigned useSplitThresholdFor1Reg; + unsigned useSplitThresholdFor4Regs; + + bool second; + + friend class ConstraintsResolver; +}; + +}}; //namespace Ia32 + Index: jitrino/src/codegenerator/ia32/Ia32GCMap.cpp =================================================================== --- jitrino/src/codegenerator/ia32/Ia32GCMap.cpp (revision 527094) +++ jitrino/src/codegenerator/ia32/Ia32GCMap.cpp (working copy) @@ -135,7 +135,7 @@ for (int i = liveOpnds.getNext(); i != -1; i = liveOpnds.getNext()) { Opnd* opnd = irm.getOpnd(i); #ifdef _DEBUG - if (opnd->getType()->isUnmanagedPtr()) checkManaged2UnmanagedConv(irm, opnd); + if (opnd->getType()->isUnmanagedPtr() && opnd!=callRes) checkManaged2UnmanagedConv(irm, opnd); #endif if (callRes == opnd) { Index: jitrino/src/codegenerator/ia32/Ia32Inst.cpp =================================================================== --- jitrino/src/codegenerator/ia32/Ia32Inst.cpp (revision 527094) +++ jitrino/src/codegenerator/ia32/Ia32Inst.cpp (working copy) @@ -276,7 +276,7 @@ } //_________________________________________________________________________________________________ -Constraint Inst::getConstraint(uint32 idx, uint32 memOpndMask, OpndSize size) +Constraint Inst::getConstraint(uint32 idx, uint32 memOpndMask, OpndSize size) const { Constraint c(OpndKind_Any); if (idx < opndCount){ @@ -342,22 +342,22 @@ assert(newOpnd != NULL); for (uint32 i=0, n=getOpndCount(); ireplaceMemOpndSubOpnd(oldOpnd, newOpnd); } + if ((opndRoleMask&OpndRole_OpndLevel)!=0 && + (opndRoleMask&OpndRole_Use)!=0) + replaced |= opnd->replaceMemOpndSubOpnd(oldOpnd, newOpnd); } - return replaced; } Index: jitrino/src/codegenerator/ia32/Ia32Inst.h =================================================================== --- jitrino/src/codegenerator/ia32/Ia32Inst.h (revision 527094) +++ jitrino/src/codegenerator/ia32/Ia32Inst.h (working copy) @@ -658,7 +658,7 @@ * @param size - if this parameter is not OpndSize_Null the resulted constraint * is adjusted to the requested size. */ - Constraint getConstraint(uint32 idx, uint32 memOpndMask, OpndSize size = OpndSize_Null); + Constraint getConstraint(uint32 idx, uint32 memOpndMask, OpndSize size = OpndSize_Null) const; /** * Returns true if the position at idx starts or extends the operand live range Index: jitrino/src/codegenerator/ia32/Ia32RegAlloc3.cpp =================================================================== --- jitrino/src/codegenerator/ia32/Ia32RegAlloc3.cpp (revision 527094) +++ jitrino/src/codegenerator/ia32/Ia32RegAlloc3.cpp (working copy) @@ -1,3 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ /** * @author Sergey L. Ivashin * @version $Revision$ @@ -14,25 +30,19 @@ #include #include -#ifdef _DEBUG__REGALLOC3 -#include "Ia32RegAllocWrite.h" +#ifdef _DEBUG_REGALLOC3 #ifdef _MSC_VER #pragma warning(disable : 4505) //unreferenced local function has been removed #endif //#ifdef _MSC_VER -#endif //#ifdef _DEBUG__REGALLOC3 +#endif //#ifdef _DEBUG_REGALLOC3 -// -// Flags to tune -// -#define _VAR2_ -#define _REGALLOC3_NEIGBH -//#define _REGALLOC3_COALESCE +using namespace std; +#ifdef _EM64T_ +#define _SKIP_CATCHED +#endif - -using namespace std; - namespace Jitrino { @@ -64,8 +74,12 @@ { MemoryManager mm; // this is private MemoryManager, not irm.getMemoryManager() - int coalesceCount; + unsigned flag_SORT; + bool flag_NEIGBH; + bool flag_COALESCE; + unsigned coalesceCount; + class BoolMatrix; typedef uint32 RegMask; // used to represent set of registers @@ -99,9 +113,10 @@ }; typedef StlVector Oproles; + typedef StlList Indexes; + struct Opndx { - typedef StlList Indexes; Indexes* adjacents, * hiddens; @@ -112,27 +127,27 @@ int ridx; // index in Registers of register assigned/will be assigned RegMask alloc, // 0 or mask of the register assigned - avail; // if not assigned, then mask of the registers available - int nbavails; // number of the registers available for this operand - int spillcost; - bool spilled; + avail; // if not assigned, then mask of the registers available (defined by calculated constraint) + unsigned nbavails; // number of the registers available for this operand ( =bitCount(avail) ) + double spillcost; + bool spill; // operand selected for spilling + bool ignore; // this operand was coalesced so its entry in the graph is invalid }; // Operand's graph to be colored struct Graph : public StlVector { - Graph (MemoryManager& m) : StlVector(m), mm(m) {} + Graph (MemoryManager& m) : StlVector(m) {} void connect (int x1, int x2) const; int disconnect (int x) const; void reconnect (int x) const; - void moveNodes (Opndx::Indexes& from, Opndx::Indexes& to, int x) const; - - MemoryManager& mm; + void moveNodes (Indexes& from, Indexes& to, int x) const; }; Graph graph; - int graphsize; + unsigned graphsize, // total size of graph (operands + registers) + xregbase; // index of first register in graph StlVector nstack; @@ -143,14 +158,24 @@ uint32 getSideEffects () const {return coalesceCount == 0 ? 0 : SideEffect_InvalidatesLivenessInfo;} void runImpl(); - bool verify(bool force=false); + void SpillGen (); + bool verify (bool force=false); bool buildGraph (); + void processInst (Inst*, BitSet&, int* opandmap, BoolMatrix&, double excount); + void showGraph (); + void lookLives (Opnd*, BitSet&, int* opandmap, BoolMatrix&); + int findNode (Opnd*) const; bool coalescing (int* opandmap, BoolMatrix& matrix); - void coalesce (int* opandmap, BoolMatrix& matrix, int, int); + void coalesce (int* opandmap, BoolMatrix& matrix, int x0, int x1); + int duplicates (Indexes* list, BoolMatrix& matrix, int x0, int x1); void pruneGraph (); - void assignRegs (); - bool assignReg (int); + bool shouldPrune (const Opndx&) const; + bool assignRegs (); + bool assignReg (Opndx&); + void spillRegs (); + int spillReg (Opndx&); + int update (const Inst*, const Opnd*, Constraint&) const; }; @@ -170,13 +195,8 @@ using std::endl; using std::ostream; -#ifdef _DEBUG__REGALLOC3 +#ifdef _DEBUG_REGALLOC3 -#include "Ia32RegAllocWrite.h" - -static void onConstruct (const IRManager&); -static void onDestruct (); - struct Sep { Sep () :first(true) {} @@ -209,16 +229,8 @@ static ostream& operator << (ostream&, const RegAlloc3::Graph&); -struct Dbgout : public ::std::ofstream -{ - Dbgout (const char* s) {open(s);} - ~Dbgout () {close();} -}; +#define DBGOUT(s) log(LogStream::DBG).out() << s -static Dbgout dbgout("RegAlloc3.txt"); - -#define DBGOUT(s) dbgout << s - #else #define DBGOUT(s) @@ -231,9 +243,9 @@ //======================================================================================== -static size_t bitCount (RegAlloc3::RegMask mk) +static int bitCount (RegAlloc3::RegMask mk) { - size_t count = 0; + int count = 0; while (mk != 0) { if ((mk & 1) != 0) @@ -244,11 +256,11 @@ } -static size_t bitNumber (RegAlloc3::RegMask mk) +static int bitNumber (RegAlloc3::RegMask mk) { assert(mk != 0); - size_t number = 0; + int number = 0; while (mk != 1) { ++number; @@ -299,6 +311,7 @@ bool isw; }; + //======================================================================================== // BoolMatrix - Symmetric boolean matrix //======================================================================================== @@ -357,6 +370,20 @@ { if (params == 0 || strcmp(params, "ALL") == 0) { +#ifdef _EM64T_ + push_back(Constraint(RegName_RAX) + |Constraint(RegName_RCX) + |Constraint(RegName_RDX) + |Constraint(RegName_RBX) + |Constraint(RegName_RSI) + |Constraint(RegName_RDI) + |Constraint(RegName_RBP) + |Constraint(RegName_R8) + |Constraint(RegName_R9) + |Constraint(RegName_R10) + |Constraint(RegName_R11) + |Constraint(RegName_R12)); +#else push_back(Constraint(RegName_EAX) |Constraint(RegName_ECX) |Constraint(RegName_EDX) @@ -364,16 +391,15 @@ |Constraint(RegName_ESI) |Constraint(RegName_EDI) |Constraint(RegName_EBP)); - - push_back(Constraint(RegName_XMM1) +#endif + push_back(Constraint(RegName_XMM0) + |Constraint(RegName_XMM1) |Constraint(RegName_XMM2) |Constraint(RegName_XMM3) |Constraint(RegName_XMM4) |Constraint(RegName_XMM5) |Constraint(RegName_XMM6) |Constraint(RegName_XMM7)); - - push_back(Constraint(RegName_FP0)); } else { @@ -391,10 +417,10 @@ assert(!empty()); - for (size_t i = 0; i != IRMaxRegKinds; ++i) + for (unsigned i = 0; i != IRMaxRegKinds; ++i) indexes[i] = -1; - for (size_t i = 0; i != size(); ++i) + for (unsigned i = 0; i != size(); ++i) indexes[operator[](i).getKind()] = (int)i; } @@ -403,7 +429,7 @@ { if (c.getMask() != 0) { - for (size_t i = 0; i != size(); ++i) + for (unsigned i = 0; i != size(); ++i) { Constraint& r = operator[](i); if (r.getKind() == c.getKind()) @@ -448,7 +474,7 @@ int disc = 0; - for (Opndx::Indexes::iterator k = opndx.adjacents->begin(); k != opndx.adjacents->end(); ++k) + for (Indexes::iterator k = opndx.adjacents->begin(); k != opndx.adjacents->end(); ++k) { // this node is adjacent to the node to be disconnected const Opndx& adjopndx = at(*k); @@ -471,7 +497,7 @@ // Node to be reconnected const Opndx& opndx = at(x); - for (Opndx::Indexes::iterator k = opndx.hiddens->begin(); k != opndx.hiddens->end(); ++k) + for (Indexes::iterator k = opndx.hiddens->begin(); k != opndx.hiddens->end(); ++k) { // this node was adjacent to the node to be reconnected const Opndx& adjopndx = at(*k); @@ -482,9 +508,9 @@ } -void RegAlloc3::Graph::moveNodes (Opndx::Indexes& from, Opndx::Indexes& to, int x) const +void RegAlloc3::Graph::moveNodes (Indexes& from, Indexes& to, int x) const { - Opndx::Indexes::iterator i; + Indexes::iterator i; while ((i = find(from.begin(), from.end(), x)) != from.end()) to.splice(to.begin(), from, i); } @@ -497,65 +523,41 @@ void RegAlloc3::runImpl () { -#ifdef _DEBUG__REGALLOC3 - onConstruct(*irManager); -#endif + getIRManager().fixEdgeProfile(); - irManager->fixEdgeProfile(); - registers.parse(getArg("regs")); DBGOUT("parameters: " << registers << endl;) + getArg("SORT", flag_SORT = 2); + getArg("NEIGBH", flag_NEIGBH = false); + getArg("COALESCE", flag_COALESCE = true); + coalesceCount = 0; + DBGOUT(endl << "passnb 1" << endl;) if (buildGraph()) { - -#ifdef _DEBUG__REGALLOC3 - dbgout << "--- graph" << endl; - for (int x = 0; x != graphsize; ++x) + pruneGraph(); + if (!assignRegs()) { - const Opndx& opndx = graph.at(x); - dbgout << "(" << x << ") " << opndx.opnd->getId() - << " " << *opndx.opnd << " ridx:" << opndx.ridx - << " avail:" << hex << opndx.avail << dec << " nbavails:" << opndx.nbavails - << " spillcost:" << opndx.spillcost; - - if (!opndx.adjacents->empty()) + bool flag_SPILL; + getArg("SPILL", flag_SPILL = false); + if (flag_SPILL) { - Sep s; - dbgout << " adjacents{"; - for (RegAlloc3::Opndx::Indexes::const_iterator i = opndx.adjacents->begin(); i != opndx.adjacents->end(); ++i) - dbgout << s << *i; - dbgout << "}"; - } + spillRegs(); + getIRManager().calculateLivenessInfo(); - if (opndx.neighbs != 0) - { - Sep s; - dbgout << " neighbs{"; - for (RegAlloc3::Opndx::Indexes::const_iterator i = opndx.neighbs->begin(); i != opndx.neighbs->end(); ++i) - dbgout << s << *i; - dbgout << "}"; + DBGOUT(endl << "passnb 2" << endl;) + buildGraph(); + pruneGraph(); + assignRegs(); } - - dbgout << endl; } - dbgout << "---------" << endl; -#endif - - pruneGraph(); - assignRegs(); } count_coalesced += coalesceCount; - -#ifdef _DEBUG__REGALLOC3 - RegAllocWrite raw(*irManager, "regalloc3.xml"); - raw.write(); - - onDestruct(); -#endif + + SpillGen(); } @@ -575,48 +577,69 @@ } +void RegAlloc3::SpillGen () +{ +/*** + bool runSpillGen = false; + + for (unsigned i = 0, opandcount = getIRManager().getOpndCount(); i != opandcount; ++i) + { + Opnd* opnd = getIRManager().getOpnd(i); + if (opnd->getConstraint(Opnd::ConstraintKind_Location, OpndSize_Default).isNull()) + if (opnd->getConstraint(Opnd::ConstraintKind_Calculated, OpndSize_Default).getKind() == OpndKind_Memory) + { + opnd->assignMemLocation(MemOpndKind_StackAutoLayout, getIRManager().getRegOpnd(STACK_REG), 0); + DBGOUT("assigned to mem " << *opnd << endl;) + } + else + runSpillGen = true; + } + + bool* spill_flag = new (getIRManager().getMemoryManager()) bool(runSpillGen); + getIRManager().setInfo("SpillGen", spill_flag); + DBGOUT("runSpillGen:" << runSpillGen << endl;) +***/ +} + + bool RegAlloc3::buildGraph () { - size_t opandcount = getIRManager().getOpndCount(); + static CountTime buildGraphTimer("ia32::RegAlloc3::buildGraph"); + AutoTimer tm(buildGraphTimer); + + const unsigned opandcount = getIRManager().getOpndCount(); + graph.resize(0); graph.reserve(opandcount); -// First, scan all the operands available and see if operand is already assigned + Opndx opndx; + +// Scan all the operands available and see if operand is already assigned // or need to be assigned - Opndx opndx; int* opandmap = new (mm) int[opandcount]; - for (size_t i = 0; i != opandcount; ++i) + for (unsigned i = 0; i != opandcount; ++i) { - Opnd* opnd = getIRManager().getOpnd((uint32)i); int mapto = -1; + Opnd* opnd = getIRManager().getOpnd(i); + opndx.opnd = opnd; + opndx.spill = false; + opndx.ignore = false; + int ridx; Constraint loc = opnd->getConstraint(Opnd::ConstraintKind_Location, OpndSize_Default); - if (!loc.isNull()) - {// this operand is already assigned to some location/register - if ((ridx = registers.index(loc)) != -1) - { - opndx.opnd = opnd; - opndx.ridx = ridx; - opndx.alloc = loc.getMask(); - mapto = (int)graph.size(); - graph.push_back(opndx); - } - } - else + if (loc.isNull()) {// this operand is not allocated yet loc = opnd->getConstraint(Opnd::ConstraintKind_Calculated, OpndSize_Default); if ((ridx = registers.index(loc)) != -1) {// operand should be assigned to register - opndx.opnd = opnd; opndx.ridx = ridx; opndx.alloc = 0; opndx.avail = loc.getMask() & registers[ridx].getMask(); - opndx.nbavails = (int)bitCount(opndx.avail); + opndx.nbavails = bitCount(opndx.avail); assert(opndx.nbavails != 0); opndx.spillcost = 1; - opndx.spilled = false; mapto = (int)graph.size(); graph.push_back(opndx); } @@ -625,26 +648,50 @@ opandmap[i] = mapto; } - if ((graphsize = (int)graph.size()) == 0) + if ((graphsize = graph.size()) == 0) return false; +// Create graph node for each register available + + xregbase = graph.size(); // graph index of the first register available + graph.reserve(graph.size() + registers.size()); + + opndx.opnd = 0; + opndx.spill = false; + opndx.ignore = false; + opndx.ridx = 0; + for (Registers::iterator it = registers.begin(), end = registers.end(); it != end; ++it) + { + for (RegMask msk = it->getMask(), mk = 1; msk != 0; mk <<= 1) + if ((msk & mk) != 0) + { + msk ^= mk; + opndx.alloc = mk; + graph.push_back(opndx); + } + + ++opndx.ridx; + } + + graphsize = graph.size(); + BoolMatrix matrix(mm, graphsize); + for (Graph::iterator i = graph.begin(); i != graph.end(); ++i) { - i->adjacents = new (mm) Opndx::Indexes(mm); - i->hiddens = new (mm) Opndx::Indexes(mm); + i->adjacents = new (mm) Indexes(mm); + i->hiddens = new (mm) Indexes(mm); i->oproles = new (mm) Oproles(mm); i->neighbs = 0; } -// Second, iterate over all instructions in CFG and calculate which operands +// Iterate over all instructions in CFG and calculate which operands // are live simultaneouesly (result stored in matrix) - BoolMatrix matrix(mm, graphsize); + BitSet lives(mm, opandcount); - BitSet lives(mm, (uint32)opandcount); - const Nodes& nodes = irManager->getFlowGraph()->getNodesPostOrder(); - for (Nodes::const_iterator it = nodes.begin(), end = nodes.end(); it!=end; ++it) { + for (Nodes::const_iterator it = nodes.begin(), end = nodes.end(); it!=end; ++it) + { Node* node = *it; if (node->isBlockNode()) { @@ -652,11 +699,8 @@ if (inst == 0) continue; - int excount = static_cast(node->getExecCount()); - if (excount < 1) - excount = 1; - excount *= 100; - //int excount = 1; + double excount = node->getExecCount() / irManager->getFlowGraph()->getEntryNode()->getExecCount(); + assert(excount > 0); // start with the operands at the block bottom getIRManager().getLiveAtExit(node, lives); @@ -664,59 +708,8 @@ // iterate over instructions towards the top of the block for (;;) { - int i, x; -#ifdef _OLD_ - Inst::Opnds defs(inst, Inst::OpndRole_AllDefs); - for (Inst::Opnds::iterator it = defs.begin(); it != defs.end(); it = defs.next(it)) - if ((x = opandmap[i = inst->getOpnd(it)->getId()]) != -1) - { - BitSet::IterB bsk(lives); - int k, y; - for (k = bsk.getNext(); k != -1; k = bsk.getNext()) - if (k != i && (y = opandmap[k]) != -1) - matrix.set(x, y); - } -#else - int defx = -1; - Oprole oprole; - const uint32* oproles = const_cast(inst)->getOpndRoles(); - Inst::Opnds opnds(inst, Inst::OpndRole_All); - for (Inst::Opnds::iterator it = opnds.begin(); it != opnds.end(); it = opnds.next(it)) - if ((x = opandmap[i = inst->getOpnd(it)->getId()]) != -1) - { - Opndx& opndx = graph.at(x); + processInst(inst, lives, opandmap, matrix, excount); - oprole.inst = inst; - oprole.role = oproles[it]; - opndx.oproles->push_back(oprole); - - if (oprole.role & Inst::OpndRole_Def) - { - defx = x; - BitSet::IterB bsk(lives); - int k, y; - for (k = bsk.getNext(); k != -1; k = bsk.getNext()) - if (k != i && (y = opandmap[k]) != -1) - matrix.set(x, y); - } -#ifdef _REGALLOC3_NEIGBH - else if (defx != -1 && !lives.getBit(opndx.opnd->getId())) - { - Opndx& defopndx = graph.at(defx); - if (defopndx.neighbs == 0) - defopndx.neighbs = new (mm) Opndx::Indexes(mm); - defopndx.neighbs->push_back(x); - - if (opndx.neighbs ==0) - opndx.neighbs = new (mm) Opndx::Indexes(mm); - opndx.neighbs->push_back(defx); - } -#endif - - opndx.spillcost += excount; - } -#endif - if (inst->getPrevInst() == 0) break; @@ -724,35 +717,198 @@ inst = inst->getPrevInst(); } } +#ifdef _SKIP_CATCHED + else if (node->isDispatchNode()) + { + BitSet* tmp = irManager->getLiveAtEntry(node); + BitSet::IterB ib(*tmp); + int i; + for (int x = ib.getNext(); x != -1; x = ib.getNext()) + if ((i = opandmap[x]) != -1) + { + Opndx& opndx = graph.at(i); + opndx.ignore = true; + DBGOUT("catched " << opndx << endl;) + } + } +#endif } -// Do iterative coalescing +// Detect and ignore not-used operands (e.g. child of coalesced operands) -#ifdef _REGALLOC3_COALESCE - while (coalescing(opandmap, matrix)) - /*nothing*/; -#endif + for (unsigned x = 0; x < xregbase; ++x) + if (graph[x].oproles->empty()) + graph[x].ignore = true; -// Third, connect nodes that represent simultaneouesly live operands +// Connect nodes that represent simultaneouesly live operands - for (int x1 = 1; x1 < graphsize; ++x1) - for (int x2 = 0; x2 < x1; ++x2) - if (matrix.test(x1, x2) && graph.at(x1).ridx == graph.at(x2).ridx) + for (unsigned x1 = 1; x1 < graphsize; ++x1) + for (unsigned x2 = 0; x2 < x1; ++x2) + if (matrix.test(x1, x2)) graph.connect(x1, x2); + showGraph(); + +// Do iterative coalescing + + if (flag_COALESCE) + while (coalescing(opandmap, matrix)) + /*nothing*/; + + showGraph(); return true; } +void RegAlloc3::processInst (Inst* inst, BitSet& lives, int* opandmap, BoolMatrix& matrix, double excount) +{ + int defx = -1; + Oprole oprole; + Inst::Opnds opnds(inst, Inst::OpndRole_All); + for (Inst::Opnds::iterator it = opnds.begin(); it != opnds.end(); it = opnds.next(it)) + { + uint32 role = inst->getOpndRoles(it); + Opnd* opnd = inst->getOpnd(it); + + // For each operand def, look at the all live operand + if (role & Inst::OpndRole_Def) + lookLives(opnd, lives, opandmap, matrix); + + int i = opnd->getId(); + int x = opandmap[i]; + if (x != -1) + { + Opndx& opndx = graph.at(x); + + if (opndx.oproles->empty() || opndx.oproles->back().inst != inst) + { + oprole.inst = inst; + oprole.role = role; + opndx.oproles->push_back(oprole); + } + else + opndx.oproles->back().role |= role; + + if (role & Inst::OpndRole_Def) + { + defx = x; + } + else if (flag_NEIGBH && defx != -1 && !lives.getBit(i)) + { + Opndx& defopndx = graph.at(defx); + if (defopndx.neighbs == 0) + defopndx.neighbs = new (mm) Indexes(mm); + defopndx.neighbs->push_back(x); + + if (opndx.neighbs == 0) + opndx.neighbs = new (mm) Indexes(mm); + opndx.neighbs->push_back(defx); + } + + opndx.spillcost += excount; + } + } +} + + +// Look for operands live at defining point +// +void RegAlloc3::lookLives (Opnd* opnd, BitSet& lives, int* opandmap, BoolMatrix& matrix) +{ + int i = opnd->getId(); + int x; + if ((x = opandmap[i]) == -1 && (x = findNode(opnd)) == -1) + return; + + BitSet::IterB bsk(lives); + int k, y; + for (k = bsk.getNext(); k != -1; k = bsk.getNext()) + if (k != i) + if ((y = opandmap[k]) != -1 || (y = findNode(irManager->getOpnd(k))) != -1) + matrix.set(x, y); +} + + +int RegAlloc3::findNode (Opnd* opnd) const +{ + Constraint loc = opnd->getConstraint(Opnd::ConstraintKind_Location); + if (!loc.isNull()) + { + int ridx = registers.index(loc); + RegMask msk = loc.getMask(); + + for (unsigned x = xregbase; x != graph.size(); ++x) + { + const Opndx& opndx = graph[x]; + assert(opndx.alloc != 0); + if (opndx.ridx == ridx && opndx.alloc == msk) + return x; + } + } + + return -1; +} + + +void RegAlloc3::showGraph () +{ +#ifdef _DEBUG_REGALLOC3 + log(LogStream::DBG) << "--- graph" << endl; + for (unsigned x = 0; x != graphsize; ++x) + { + const Opndx& opndx = graph.at(x); + log(LogStream::DBG).out() + << "(" << x << ") " + << (opndx.ignore ? "IGNORE " : ""); + + if (opndx.opnd != 0) + log(LogStream::DBG).out() << *opndx.opnd; + else + log(LogStream::DBG).out() << "REG"; + + log(LogStream::DBG).out() + << " ridx:" << opndx.ridx + << " avail:" << hex << opndx.avail << " alloc:" << opndx.alloc << dec + //<< " nbavails:" << opndx.nbavails + << " spillcost:" << opndx.spillcost; + + if (!opndx.adjacents->empty()) + { + Sep s; + log(LogStream::DBG) << " adjacents{"; + for (RegAlloc3::Indexes::const_iterator i = opndx.adjacents->begin(); i != opndx.adjacents->end(); ++i) + log(LogStream::DBG).out() << s << *i; + log(LogStream::DBG) << "}"; + } + + if (opndx.neighbs != 0) + { + Sep s; + log(LogStream::DBG) << " neighbs{"; + for (RegAlloc3::Indexes::const_iterator i = opndx.neighbs->begin(); i != opndx.neighbs->end(); ++i) + log(LogStream::DBG).out() << s << *i; + log(LogStream::DBG) << "}"; + } + + log(LogStream::DBG) << endl; + } + log(LogStream::DBG) << "---------" << endl; +#endif +} + + bool RegAlloc3::coalescing (int* opandmap, BoolMatrix& matrix) { + //static CountTime coalescingTimer("ia32::RegAlloc3::coalescing"); + //AutoTimer tm(coalescingTimer); + int x0, x1; const Nodes& nodes = irManager->getFlowGraph()->getNodesPostOrder(); - for (Nodes::const_iterator it = nodes.begin(), end = nodes.end(); it!=end; ++it) { + for (Nodes::const_iterator it = nodes.begin(), end = nodes.end(); it!=end; ++it) + { Node* node = *it; if (node->isBlockNode()) - { for (const Inst* inst = (Inst*)node->getLastInst(); inst != 0; inst = inst->getPrevInst()) if (inst->getMnemonic() == Mnemonic_MOV) if ((x0 = opandmap[inst->getOpnd(0)->getId()]) != -1 && @@ -762,13 +918,58 @@ Opndx& opndx0 = graph.at(x0), & opndx1 = graph.at(x1); - if (opndx0.ridx == opndx1.ridx && opndx0.avail == opndx1.avail) + RegMask avail = opndx0.avail & opndx1.avail; + unsigned nbavails = bitCount(avail); + + if (opndx0.ridx != opndx1.ridx || nbavails == 0) + continue; + + //if (opndx0.opnd->getSize() != opndx1.opnd->getSize()) + // continue; + + Type* t0 = opndx0.opnd->getType(), + * t1 = opndx1.opnd->getType(); + + //if ((t0->isManagedPtr() || t0->isObject()) != (t1->isManagedPtr() || t1->isObject())) + // continue; + + if (!Type::mayAlias(&irManager->getTypeManager(), t0, t1)) + continue; + + DBGOUT("coalesce candidates (" << x0 << ") & (" << x1 << ") " << *inst << endl;) + + unsigned xdegree = 0, // estimated degree of the coalesced node + xcount = 0; // number of neighbours with degree >= k + + xdegree = opndx0.adjacents->size() + opndx1.adjacents->size() + - duplicates(opndx1.adjacents, matrix, x0, x1); + + for (Indexes::iterator ptr = opndx0.adjacents->begin(), end = opndx0.adjacents->end(); ptr != end; ++ptr) { - coalesce (opandmap, matrix, x0, x1); - return true; + Indexes* ixs = graph.at(*ptr).adjacents; + unsigned ndegree = ixs->size() - duplicates(ixs, matrix, x0, x1); + if (ndegree >= nbavails) + if (++xcount >= nbavails) + break; } + for (Indexes::iterator ptr = opndx1.adjacents->begin(), end = opndx1.adjacents->end(); ptr != end; ++ptr) + if (!matrix.test(*ptr, x0)) + { + Indexes* ixs = graph.at(*ptr).adjacents; + unsigned ndegree = ixs->size(); + if (ndegree >= nbavails) + if (++xcount >= nbavails) + break; + } + + DBGOUT("xdegree:" << xdegree << " xcount:" << xcount << endl;) + + if (xcount >= nbavails || xdegree >= nbavails) + continue; + + coalesce (opandmap, matrix, x0, x1); + return true; } - } } return false; @@ -777,7 +978,7 @@ // Colalesce graph nodes (x0) and (x1) and the corresponding operands. // Node (x1) not to be used anymore, (x0) must be used unstead. -// Note that (x1) remains in the graph +// Note that (x1) remains in the graph (must be ignored) // void RegAlloc3::coalesce (int* opandmap, BoolMatrix& matrix, int x0, int x1) { @@ -786,8 +987,11 @@ Opndx& opndx0 = graph.at(x0), & opndx1 = graph.at(x1); - opndx1.spilled = true; + opndx0.avail &= opndx1.avail; + opndx0.nbavails = bitCount(opndx0.avail); + opndx1.ignore = true; + Opnd* opnd0 = opndx0.opnd, * opnd1 = opndx1.opnd; @@ -802,137 +1006,168 @@ opndx0.oproles->insert(opndx0.oproles->end(), opndx1.oproles->begin(), opndx1.oproles->end()); opndx1.oproles->clear(); - for (int x = 0; x < graphsize; ++x) - if (x != x1 && matrix.test(x, x1)) - { + Indexes tmp(mm); // list of trash indexes + + for (Indexes::iterator ptr = opndx1.adjacents->begin(), end = opndx1.adjacents->end(); ptr != end;) + { + Indexes::iterator ptr_next = ptr; + ++ptr_next; + + int x = *ptr; + assert(matrix.test(x, x1)); + matrix.clear(x, x1); + + Opndx& opndx = graph.at(x); + Indexes::iterator ptrx = find(opndx.adjacents->begin(), opndx.adjacents->end(), x1); + assert(ptrx != opndx.adjacents->end()); + + if (matrix.test(x, x0)) + {// disconnect (x1 - x) + tmp.splice(tmp.end(), *opndx1.adjacents, ptr); + tmp.splice(tmp.end(), *opndx.adjacents, ptrx); + } + else + {// connect (x0 - x) matrix.set(x, x0); - matrix.clear(x, x1); + opndx0.adjacents->splice(opndx0.adjacents->end(), *opndx1.adjacents, ptr); // x0 -> x + *ptrx = x0; // x -> x0 } + ptr = ptr_next; + } + + assert(opndx1.adjacents->empty()); + ++coalesceCount; } -#ifndef _VAR0 +int RegAlloc3::duplicates (RegAlloc3::Indexes* list, RegAlloc3::BoolMatrix& matrix, int x0, int x1) +{ + int count = 0; + for (RegAlloc3::Indexes::iterator ptr = list->begin(), end = list->end(); ptr != end; ++ptr) + if (*ptr != x0 && *ptr != x1) + if (matrix.test(*ptr, x0) && matrix.test(*ptr, x1)) + ++count; + + return count; +} + + struct sortRule1 { const RegAlloc3::Graph& graph; + const unsigned int rule; + sortRule1 (const RegAlloc3::Graph& g, unsigned int r) :graph(g), rule(r) {} - sortRule1 (const RegAlloc3::Graph& g) :graph(g) {} - - bool operator () (int x1, int x2) { const RegAlloc3::Opndx& opndx1 = graph.at(x1), & opndx2 = graph.at(x2); -#ifdef _VAR1_ - return opndx1.spillcost > opndx2.spillcost; -#endif -#ifdef _VAR2_ - return opndx1.spillcost < opndx2.spillcost; -#endif - + return rule == 1 ? opndx1.spillcost > opndx2.spillcost + : opndx1.spillcost < opndx2.spillcost; } }; -#endif - void RegAlloc3::pruneGraph () { - DBGOUT(endl << "pruneGraph - start"<< endl;) + static CountTime pruneGraphTimer("ia32::RegAlloc3::pruneGraph"); + AutoTimer tm(pruneGraphTimer); - size_t nbnodes = 0; - for (int i = 0; i != graphsize; ++i) - if (!graph.at(i).adjacents->empty()) + DBGOUT(endl << "pruneGraph"<< endl;) + +// Calculate number of nodes that should be pruned off the graph + int nbnodes = 0; + for (unsigned i = 0; i != graphsize; ++i) + if (shouldPrune(graph.at(i))) nbnodes++; StlVector tmp(mm); nstack.reserve(nbnodes); - while (nbnodes != 0) + while (nbnodes > 0) { // Apply degree < R rule -#ifdef _VAR0_ + if (flag_SORT == 0) - for (bool succ = false; !succ;) - { - succ = true; - for (int i = 0; i != graphsize; ++i) + for (bool succ = false; !succ;) { - Opndx& opndx = graph.at(i); - const int n = (int)opndx.adjacents->size(); - if (n != 0 && n < opndx.nbavails) + succ = true; + for (unsigned i = 0; i != graphsize; ++i) { - nbnodes -= graph.disconnect(i); - nstack.push_back(i); - succ = false; - - DBGOUT(" rule#1 (" << i << ")" << endl;) + Opndx& opndx = graph.at(i); + if (shouldPrune(opndx)) + { + const unsigned n = opndx.adjacents->size(); + if (n != 0 && n < opndx.nbavails) + { + nbnodes -= graph.disconnect(i); + nstack.push_back(i); + succ = false; + //DBGOUT(" rule#1 (" << i << ")" << endl;) + } + } } } - } + else -#else + for (bool succ = false; !succ;) + { + succ = true; - for (bool succ = false; !succ;) - { - succ = true; + tmp.resize(0); - tmp.resize(0); + for (unsigned i = 0; i != graphsize; ++i) + { + Opndx& opndx = graph.at(i); + if (shouldPrune(opndx)) + { + const unsigned n = opndx.adjacents->size(); + if (n != 0 && n < opndx.nbavails) + tmp.push_back(i); + } + } - for (int i = 0; i != graphsize; ++i) - { - Opndx& opndx = graph.at(i); - const int n = (int)opndx.adjacents->size(); - if (n != 0 && n < opndx.nbavails) - tmp.push_back(i); - } + if (tmp.size() != 0) + { + if (tmp.size() > 1) + sort(tmp.begin(), tmp.end(), sortRule1(graph, flag_SORT)); - if (tmp.size() != 0) - { - DBGOUT("buck size:" << tmp.size() << endl;) - if (tmp.size() > 1) - sort(tmp.begin(), tmp.end(), sortRule1(graph)); + for (StlVector::iterator it = tmp.begin(); it != tmp.end(); ++it) + { + nbnodes -= graph.disconnect(*it); + nstack.push_back(*it); + } - for (StlVector::iterator it = tmp.begin(); it != tmp.end(); ++it) - { - nbnodes -= graph.disconnect(*it); - nstack.push_back(*it); - - DBGOUT(" rule#1 (" << *it << ") cost:" << graph.at(*it).spillcost << endl;) + succ = false; } - - succ = false; } - } -#endif - // Apply degree >= R rule - if (nbnodes != 0) + if (nbnodes > 0) { int x = -1, n; double cost = 0, w; // Find some node to disconnect - for (int i = 0; i != graphsize; ++i) + for (unsigned i = 0; i != graphsize; ++i) { Opndx& opndx = graph.at(i); - if ((n = (int)opndx.adjacents->size()) != 0) - { - w = (double)opndx.spillcost/(double)n; - DBGOUT(" (" << i << ") cost:" << w << endl;) - if (x == -1 || w < cost) - cost = w, - x = i; - } + if (shouldPrune(opndx)) + if ((n = (int)opndx.adjacents->size()) != 0) + { + w = opndx.spillcost/(double)n; + if (x == -1 || w < cost) + cost = w, + x = i; + } } assert(x != -1); @@ -940,18 +1175,27 @@ { nbnodes -= graph.disconnect(x); nstack.push_back(x); - - DBGOUT(" rule#2 (" << x << ") cost:" << cost << endl;) } } } +} - DBGOUT("pruneGraph - stop"<< endl << graph;) + +bool RegAlloc3::shouldPrune (const Opndx& opndx) const +{ + return !opndx.ignore && opndx.alloc == 0 && !opndx.adjacents->empty(); } -void RegAlloc3::assignRegs () +bool RegAlloc3::assignRegs () { + static CountTime assignRegsTimer("ia32::RegAlloc3::assignRegs"); + AutoTimer tm(assignRegsTimer); + + DBGOUT("assignRegs" << endl;) + + int spilled = 0; + while (!nstack.empty()) { int x = nstack.back(); @@ -959,30 +1203,47 @@ Opndx& opndx = graph.at(x); graph.reconnect(x); - opndx.spilled = !assignReg(x); + if (opndx.alloc == 0) + { + DBGOUT("(" << x << ")" << endl;) + opndx.spill = !assignReg(opndx); + } } - for (int x = 0; x != graphsize; ++x) + for (unsigned x = 0; x != graphsize; ++x) { Opndx& opndx = graph.at(x); - if (opndx.alloc == 0 && !opndx.spilled ) - assignReg(x); + if (opndx.alloc == 0 && !opndx.spill && !opndx.ignore) + { + DBGOUT("(" << x << ")" << endl;) + opndx.spill = !assignReg(opndx); + } + + if (opndx.spill) + ++spilled; } + + DBGOUT("spilled " << spilled << " operands" << endl;) + + return spilled == 0; } -bool RegAlloc3::assignReg (int x) +bool RegAlloc3::assignReg (Opndx& opndx) { - Opndx& opndx = graph.at(x); RegMask alloc = 0; - for (Opndx::Indexes::iterator i = opndx.adjacents->begin(); i != opndx.adjacents->end(); ++i) - alloc |= graph.at(*i).alloc; + assert(!opndx.ignore); + for (Indexes::iterator i = opndx.adjacents->begin(); i != opndx.adjacents->end(); ++i) + { + Opndx& opndz = graph.at(*i); + if (opndz.ridx == opndx.ridx) + alloc |= opndz.alloc; + } if ((alloc = opndx.avail & ~alloc) == 0) { - ++count_spilled; - DBGOUT("spilled (" << x << ")" << endl;) + DBGOUT(" assign " << *opndx.opnd << " failed" << endl;) return false; } else @@ -990,7 +1251,7 @@ if (opndx.neighbs != 0) { RegMask neighbs = 0; - for (Opndx::Indexes::iterator i = opndx.neighbs->begin(); i != opndx.neighbs->end(); ++i) + for (Indexes::iterator i = opndx.neighbs->begin(); i != opndx.neighbs->end(); ++i) { Opndx& neigbx = graph.at(*i); if (neigbx.ridx == opndx.ridx) @@ -999,7 +1260,7 @@ if ((neighbs & alloc) != 0 && neighbs != alloc) { - DBGOUT("! alloc:" << std::hex << alloc << " * neighbs:" << neighbs << " =" << (alloc & neighbs) << std::dec << endl); + DBGOUT(" !alloc:" << std::hex << alloc << " * neighbs:" << neighbs << " =" << (alloc & neighbs) << std::dec << endl); alloc &= neighbs; } } @@ -1007,49 +1268,117 @@ opndx.alloc = findHighest(alloc); opndx.opnd->assignRegName(getRegName((OpndKind)registers[opndx.ridx].getKind(), opndx.opnd->getSize(), - (int)bitNumber(opndx.alloc))); + bitNumber(opndx.alloc))); ++count_assigned; - DBGOUT("assigned (" << x << ") = <" << getRegNameString(opndx.opnd->getRegName()) << ">" << endl;) + DBGOUT(" assigned " << *opndx.opnd << endl;) return true; } } -//======================================================================================== -// Internal debug helpers -//======================================================================================== +void RegAlloc3::spillRegs () +{ + DBGOUT("spillRegs" << endl;) + int inserted = 0; -#ifdef _DEBUG__REGALLOC3 + for (unsigned x = 0; x != graphsize; ++x) + { + Opndx& opndx = graph.at(x); + if (opndx.spill) + inserted += spillReg(opndx); + } -static void onConstruct (const IRManager& irm) + DBGOUT("inserted " << inserted << " operands" << endl;) +} + + +int RegAlloc3::spillReg (Opndx& opndx) { - MethodDesc& md = irm.getMethodDesc(); - const char * methodName = md.getName(); - const char * methodTypeName = md.getParentType()->getName(); - dbgout << endl << "Constructed " << methodTypeName << "." << methodName - << "(" << md.getSignatureString() << ")" << endl; + Opnd* opnd = opndx.opnd; + const Constraint initial = opnd->getConstraint(Opnd::ConstraintKind_Initial); -// sos = strcmp(methodTypeName, "spec/benchmarks/_213_javac/BatchEnvironment") == 0 && -// strcmp(methodName, "flushErrors") == 0; + if ((initial.getKind() & OpndKind_Memory) == 0) + { + DBGOUT(" spilling " << *opndx.opnd << " failed" << endl;) + return 0; + } + + DBGOUT(" spilling " << *opndx.opnd << endl;) + opnd->setCalculatedConstraint(initial); + opnd->assignMemLocation(MemOpndKind_StackAutoLayout, irManager->getRegOpnd(STACK_REG), 0); + + int inserted = 0; + + for (Oproles::iterator ptr = opndx.oproles->begin(), end = opndx.oproles->end(); ptr != end; ++ptr) + { + Opnd* opndnew = getIRManager().newOpnd(opnd->getType(), initial); + Inst* inst = ptr->inst; + Inst* instnew = 0; + bool replaced = false; + if (ptr->role & Inst::OpndRole_Use) + { + instnew = getIRManager().newCopyPseudoInst(Mnemonic_MOV, opndnew, opnd); + instnew->insertBefore(inst); + replaced = inst->replaceOpnd(opnd, opndnew); + assert(replaced); + DBGOUT(" before " << *inst << " inserted " << *instnew << " MOV " << *opndnew << ", " << *opnd << endl;) + } + + if (ptr->role & Inst::OpndRole_Def) + { + assert(!inst->hasKind(Inst::Kind_LocalControlTransferInst)); + if (!replaced) + replaced = inst->replaceOpnd(opnd, opndnew); + assert(replaced); + instnew = getIRManager().newCopyPseudoInst(Mnemonic_MOV, opnd, opndnew); + instnew->insertAfter(inst); + DBGOUT(" after " << *inst << " inserted " << *instnew << " MOV " << *opnd << ", " << *opndnew << endl;) + } + + Constraint c = initial; + update(instnew, opndnew, c); + update(inst, opndnew, c); + opndnew->setCalculatedConstraint(c); + + ++inserted; + } + + ++count_spilled; + return inserted; } -static void onDestruct () +// If currently handled operand is referenced by current instruction, then evaluate +// constraint of the operand imposed by this instruction and return 'true'. +// Otherwise, do nothing and return false. +// +int RegAlloc3::update (const Inst* inst, const Opnd* opnd, Constraint& constr) const { - dbgout << endl << "Destructed" << endl; + int count = 0; + Inst::Opnds opnds(inst, Inst::OpndRole_All); + for (Inst::Opnds::iterator it = opnds.begin(); it != opnds.end(); it = opnds.next(it)) + if ( inst->getOpnd(it) == opnd) + { + Constraint c = inst->getConstraint(it, 0, constr.getSize()); + if (constr.isNull()) + constr = c; + else + constr.intersectWith(c); + + count++; + } + return count; } -#endif //#ifdef _DEBUG__REGALLOC3 - //======================================================================================== // Output formatters //======================================================================================== -#ifdef _DEBUG__REGALLOC3 +#ifdef _DEBUG_REGALLOC3 static ostream& operator << (ostream& os, Sep& x) { @@ -1105,11 +1434,11 @@ { Sep s;; os << "{"; - for (size_t rk = 0; rk != registers.size(); ++rk) + for (unsigned rk = 0; rk != registers.size(); ++rk) { RegAlloc3::RegMask msk = x[rk]; - for (size_t rx = 0; msk != 0; ++rx, msk >>= 1) + for (unsigned rx = 0; msk != 0; ++rx, msk >>= 1) if ((msk & 1) != 0) { RegName reg = getRegName((OpndKind)registers[rk].getKind(), registers[rk].getSize(), rx); @@ -1130,7 +1459,7 @@ { Sep s; os << " adjacents{"; - for (RegAlloc3::Opndx::Indexes::const_iterator i = opndx.adjacents->begin(); i != opndx.adjacents->end(); ++i) + for (RegAlloc3::Indexes::const_iterator i = opndx.adjacents->begin(); i != opndx.adjacents->end(); ++i) os << s << *i; os << "}"; } @@ -1139,7 +1468,7 @@ { Sep s; os << " hiddens{"; - for (RegAlloc3::Opndx::Indexes::const_iterator i = opndx.hiddens->begin(); i != opndx.hiddens->end(); ++i) + for (RegAlloc3::Indexes::const_iterator i = opndx.hiddens->begin(); i != opndx.hiddens->end(); ++i) os << s << *i; os << "}"; } @@ -1158,7 +1487,7 @@ } -#endif //#ifdef _DEBUG__REGALLOC3 +#endif //#ifdef _DEBUG_REGALLOC3 } //namespace Ia32 } //namespace Jitrino Index: jitrino/src/codegenerator/ia32/Ia32SpillGen.cpp =================================================================== --- jitrino/src/codegenerator/ia32/Ia32SpillGen.cpp (revision 527094) +++ jitrino/src/codegenerator/ia32/Ia32SpillGen.cpp (working copy) @@ -193,7 +193,6 @@ bool evicts_known; - // Methods // ------- @@ -579,6 +578,13 @@ onConstruct(*this); #endif + const bool* spill_flag = static_cast(getIRManager().getInfo("SpillGen")); + if (spill_flag != 0 && !*spill_flag) + { + DBGOUT("SpillGen skipped" << endl;); + return; + } + registers.parse(getArg("regs")); DBGOUT("parameters: " << registers << endl;) irManager->resetOpndConstraints(); @@ -652,6 +658,7 @@ static Counter count_emits("ia32:spillgen:emits", 0); count_emits += emitted; + #ifdef _DEBUG_SPILLGEN log(LogStream::DBG) << endl << "Emitted movs :" << emitted << endl; @@ -869,7 +876,6 @@ assert(instxp == instxs.begin()); return actives.size(); - } @@ -896,11 +902,9 @@ lives_catch = 0; Node* node = bblock->getExceptionEdgeTarget(); - if (node!=NULL) { - if ((lives_catch = irManager->getLiveAtEntry(node))->isEmpty()) { + if (node != NULL) + if ((lives_catch = irManager->getLiveAtEntry(node))->isEmpty()) lives_catch = 0; - } - } for (Oplines::reverse_iterator it = actives.rbegin(); it != actives.rend(); ++it) { @@ -935,12 +939,13 @@ // Process instructions that are using the operand - while (opline.go()) { + while (opline.go()) if (opline.isProc()) { Constraint c(opline.opnd->getConstraint(Opnd::ConstraintKind_Initial)); update(opline.instx->inst, opline.opnd, c); opline.idx = registers.getIndex(c); + if (!tryRegister(opline, c, prefreg)) if (!tryMemory(opline, c)) if (!tryEvict(opline, c)) @@ -950,7 +955,10 @@ ++fails; if (simplify(opline.instx->inst, opline.opnd)) + { + loadOpndMem(opline); break; + } loadOpnd(opline, opline.instx, opline.opnd); @@ -966,11 +974,10 @@ { opline.forw(); } - } // End-block processing - assert(opline.instx == opline.ops->front().instx); + //assert(opline.instx == opline.ops->front().instx); if (opline.at_exit) loadOpndMem(opline); } @@ -1012,6 +1019,7 @@ Constraint cr((OpndKind)cx.getKind(), c.getSize(), cx.getMask()); // handle first instruction of the interval + RegMask mk = cr.getMask() & ~usedRegs(opline.instx, opline.idx, opline.isUse()); if (mk == 0) { @@ -1025,7 +1033,6 @@ // handle second and all others instructions - Instx* begx = opline.instx; Instx* endx = begx; int count = 0; @@ -1059,7 +1066,8 @@ mkpost = callRegs(opline.instx, opline.idx); count += cnt; } - DBGOUT(" -reg [" << (const Inst*)begx->inst << " - " << (const Inst*)endx->inst + + DBGOUT(" -reg [" << *begx->inst << " - " << *endx->inst << "] avail:" << RegMasks(cx, mk) << " count:" << count << endl;) if (count == 0) @@ -1073,6 +1081,7 @@ return true; } } + if ((mk & prefreg) != 0) { mk &= prefreg; @@ -1241,14 +1250,15 @@ bool SpillGen::simplify (Inst* inst, Opnd* opnd) { + Opnd* opndm; if (inst->hasKind(Inst::Kind_LocalControlTransferInst) && inst->getOpndCount() == 1) { - Opnd* opndm = inst->getOpnd(0); + opndm = inst->getOpnd(0); Opnd* opndx = irManager->newMemOpnd(opndm->getType(), MemOpndKind_StackAutoLayout, - irManager->getRegOpnd(RegName_ESP), + irManager->getRegOpnd(STACK_REG), 0); - DBGOUT("simplify " << *opndm << " -> mem " << *opndx << endl;); + DBGOUT("simplify jump " << *opndm << " -> mem " << *opndx << endl;); emitMove(true, inst,opndx, opndm); inst->replaceOpnd(opndm, opndx); @@ -1261,6 +1271,25 @@ return true; } + else if (inst->hasKind(Inst::Kind_CallInst) && (opndm = inst->getOpnd(0)) == opnd) + { + Opnd* opndx = irManager->newMemOpnd(opndm->getType(), + MemOpndKind_StackAutoLayout, + irManager->getRegOpnd(STACK_REG), + 0); + DBGOUT("simplify call " << *opndm << " -> mem " << *opndx << endl;); + emitMove(true, inst,opndx, opndm); + inst->replaceOpnd(opndm, opndx); + + for (int so = 0; so != MemOpndSubOpndKind_Count; ++so) + { + Opnd* sub = opndm->getMemOpndSubOpnd(static_cast(so)); + if (sub != 0) + actsmap[sub->getId()] = 0; + } + return true; + } + return false; } @@ -1384,6 +1413,7 @@ << " - " << *endx->inst << "] to " << *opline.opnd_mem << endl;) loadOpnd(opline, begx, opline.opnd_mem); + if (opline.opnd != opline.opnd_mem) for (; begx <= endx; ++begx) { Index: jitrino/src/codegenerator/ia32/Ia32WebMaker.cpp =================================================================== --- jitrino/src/codegenerator/ia32/Ia32WebMaker.cpp (revision 527094) +++ jitrino/src/codegenerator/ia32/Ia32WebMaker.cpp (working copy) @@ -20,18 +20,18 @@ */ #include "Ia32IRManager.h" +#include "Ia32ConstraintsResolver.h" #include "XTimer.h" #include "Counter.h" #include "Stl.h" -#ifdef _DEBUG__WEBMAKER +#ifdef _DEBUG_WEBMAKER #include #include -#include "Ia32IRXMLDump.h" #ifdef _MSC_VER #pragma warning(disable : 4505) //unreferenced local function has been removed #endif //#ifdef _MSC_VER -#endif //#ifdef _DEBUG__WEBMAKER +#endif //#ifdef _DEBUG_WEBMAKER using namespace std; @@ -51,7 +51,7 @@ { int globid; // -1 for local definition or uniquue id (starting from 0) Inst* defp; // defining instruction - size_t linkx; + unsigned linkx; bool visited; typedef StlVector Instps; @@ -79,10 +79,10 @@ struct Nodex { - Nodex (MemoryManager& mm, size_t s) :globentrys(mm,(uint32)s), - globdefsp (0), - globkillsp(0), - globexitsp(0) {} + Nodex (MemoryManager& mm, unsigned s) :globentrys(mm, s), + globdefsp (0), + globkillsp(0), + globexitsp(0) {} BitSet globentrys; // all global definitions available at the block entry BitSet* globdefsp, // can be 0 @@ -93,10 +93,10 @@ typedef StlVector Nodexs; Nodexs nodexs; - /*const*/ size_t opandcount; - /*const*/ size_t nodecount; - size_t splitcount; - size_t globcount; + /*const*/ unsigned opandcount; + /*const*/ unsigned nodecount; + unsigned splitcount; + unsigned globcount; WebMaker () :mm(1000, "WebMaker"), opndxs(mm), nodexs(mm) {} @@ -105,6 +105,7 @@ uint32 getSideEffects () const {return splitcount == 0 ? 0 : SideEffect_InvalidatesLivenessInfo;} void runImpl(); + void calculateConstraints(); void phase1(); void phase2(); @@ -118,7 +119,7 @@ static ActionFactory _webmaker("webmaker"); -static Counter count_splitted("ia32:webmaker:splitted", 0); +static Counter count_splitted("ia32:webmaker:splitted", 0); //======================================================================================== @@ -126,13 +127,10 @@ //======================================================================================== -#ifdef _DEBUG__WEBMAKER - using std::endl; using std::ostream; -static void onConstruct (const IRManager&); -static void onDestruct (); +#ifdef _DEBUG_WEBMAKER struct Sep { @@ -149,24 +147,162 @@ static ostream& operator << (ostream&, /*const*/ BitSet*); -struct Dbgout : public ::std::ofstream +#define DBGOUT(s) log(LogStream::DBG).out() << s + +#else + +#define DBGOUT(s) + +#endif + + +//======================================================================================== +// Inst::Opnds +//======================================================================================== + +class InstOpnds { - Dbgout (const char* s) {open(s);} - ~Dbgout () {close();} +public: + + InstOpnds (const Inst* inst, uint32 roles = Inst::OpndRole_All, bool forw = true); + + bool hasMore () const {return opnd != 0;} + void next () {move();} + Opnd* getOpnd () const {return opnd;} + uint32 getRole () const {return role;} + +protected: + + bool move (); + + const Inst* inst; + const uint32 roles; + const unsigned main_count; + + unsigned state, + main_idx, + sub_idx; + + uint32 role; + Opnd* opnd; + Opnd* main_opnd; }; -static Dbgout dbgout("WebMaker.txt"); -#define DBGOUT(s) dbgout << s -//#define DBGOUT(s) std::cerr << s +InstOpnds::InstOpnds (const Inst* i, uint32 r, bool forw) +:inst(i), roles(r), main_count(i->getOpndCount()) +{ + if (main_count == 0) + { + state = 6; + opnd = 0; + } + else if (forw) + { + main_idx = 0; + state = 0; + move(); + } + else + { + main_idx = main_count; + state = 3; + move(); + } +} -#else -#define DBGOUT(s) +bool InstOpnds::move () +{ + opnd = 0; -#endif + do + switch (state) + { + // forward iteration + case 0: // main operands + opnd = inst->getOpnd(main_idx); + role = inst->getOpndRoles(main_idx); + if (++main_idx == main_count) + { + main_idx = 0; + state = 1; + } + return true; + case 1: // find next memory operand + for (;; ++main_idx) + { + if (main_idx == main_count) + { + state = 6; + return false; + } + + main_opnd = inst->getOpnd(main_idx); + if (main_opnd->getMemOpndKind() != MemOpndKind_Null) + break; + } + + sub_idx = 0; + state = 2; + // fall to case 2 + + case 2: // sub operands + opnd = main_opnd->getMemOpndSubOpnd((MemOpndSubOpndKind)sub_idx); + role = Inst::OpndRole_OpndLevel | Inst::OpndRole_Use; + if (++sub_idx == 4) + { + ++main_idx; + state = 1; + } + break; + + // backward iteration + + case 3: // find prev memory operand + for (;;) + { + if (main_idx == 0) + { + main_idx = main_count; + state = 5; + goto S5; + } + + main_opnd = inst->getOpnd(--main_idx); + if (main_opnd->getMemOpndKind() != MemOpndKind_Null) + break; + } + + sub_idx = 4; + state = 4; + // fall to case 4 + + case 4: // sub operands + opnd = main_opnd->getMemOpndSubOpnd((MemOpndSubOpndKind)--sub_idx); + role = Inst::OpndRole_OpndLevel | Inst::OpndRole_Use; + if (sub_idx == 0) + state = 3; + break; + + case 5: // main operands +S5: opnd = inst->getOpnd(--main_idx); + role = inst->getOpndRoles(main_idx); + if (main_idx == 0) + state = 6; + return true; + + case 6: + return false; + } + while (opnd == 0 /*TBD: check roles here */); + + return true; +} + + //======================================================================================== // WebMaker implementation //======================================================================================== @@ -174,10 +310,6 @@ void WebMaker::runImpl() { -#ifdef _DEBUG__WEBMAKER - onConstruct(irm); -#endif - opandcount = irManager->getOpndCount(); nodecount = irManager->getFlowGraph()->getMaxNodeId(); @@ -192,36 +324,43 @@ phase2(); phase3(); phase4(); + + if (splitcount != 0 && getBoolArg("calc", false)) + calculateConstraints(); } count_splitted += splitcount; - DBGOUT("***splitcount=" << splitcount << "/" << count_splitted << endl;) + DBGOUT("splitcount=" << splitcount << "/" << count_splitted << endl;) +} -#ifdef _DEBUG__WEBMAKER - onDestruct(); -#endif + +void WebMaker::calculateConstraints () +{ + DBGOUT("Calculating constraints" << endl;) + + irManager->calculateLivenessInfo(); + ConstraintsResolverImpl impl(*irManager, true); + impl.run(); } void WebMaker::phase1() { - static CountTime phase1Timer("timer:ia32::webmaker:phase1"); - AutoTimer tm(phase1Timer); - opndxs.resize(opandcount); - for (size_t i = 0; i != opandcount; ++i) + for (unsigned i = 0; i != opandcount; ++i) { Opndx* opndxp = 0; - if (!irManager->getOpnd((uint32)i)->hasAssignedPhysicalLocation()) + if (!irManager->getOpnd(i)->hasAssignedPhysicalLocation()) opndxp = new Opndx(mm); opndxs[i] = opndxp; } - BitSet lives(mm, (uint32)opandcount); + BitSet lives(mm, opandcount); globcount = 0; const Nodes& postOrder = irManager->getFlowGraph()->getNodesPostOrder(); - for (Nodes::const_iterator it = postOrder.begin(), end = postOrder.end(); it!=end; ++it) { + for (Nodes::const_iterator it = postOrder.begin(), end = postOrder.end(); it!=end; ++it) + { Node* nodep = *it; if (nodep->isBlockNode()) @@ -229,14 +368,17 @@ for (Inst* instp = (Inst*)nodep->getFirstInst(); instp!=NULL; instp = instp->getNextInst()) { const uint32 iprops = instp->getProperties(); Inst::Opnds defs(instp, Inst::OpndRole_AllDefs); - size_t itx = 0; + unsigned itx = 0; for (Inst::Opnds::iterator it = defs.begin(); it != defs.end(); it = defs.next(it), ++itx) + //for (InstOpnds inops(instp, Inst::OpndRole_All, true); inops.hasMore(); inops.next()) { Opnd* opndp = instp->getOpnd(it); + //Opnd* opndp = inops.getOpnd(); Opndx* opndxp = opndxs.at(opndp->getId()); if (opndxp != 0) { - const uint32 oprole = const_cast(instp)->getOpndRoles((uint32)itx); + const uint32 oprole = const_cast(instp)->getOpndRoles(itx); + //const uint32 oprole = inops.getRole(); const bool isdef = ((oprole & Inst::OpndRole_UseDef) == Inst::OpndRole_Def) && ((iprops & Inst::Properties_Conditional) == 0 ); if (isdef) @@ -253,6 +395,8 @@ } } + // Mark all global definitions + irManager->getLiveAtExit(nodep, lives); BitSet::IterB bsk(lives); for (int i = bsk.getNext(); i != -1; i = bsk.getNext()) @@ -272,7 +416,7 @@ } } -#ifdef _DEBUG__WEBMAKER +#ifdef _DEBUG_WEBMAKER /* dbgout << "--- phase1 ---" << endl; for (size_t i = 0; i != opandcount; ++i) @@ -295,9 +439,6 @@ void WebMaker::phase2() { - static CountTime phase2Timer("timer:ia32::webmaker:phase2"); - AutoTimer tm(phase2Timer); - nodexs.resize(nodecount); for (size_t n = 0; n != nodecount; ++n) nodexs[n] = new Nodex(mm, globcount); @@ -324,9 +465,7 @@ opndxp->cbbp = 0; opndxp->copdefp = 0; } - - BitSet wrkbs(mm, (uint32)globcount); bool wrkbsvalid; size_t passnb = 0; @@ -391,7 +530,7 @@ } } -#ifdef _DEBUG__WEBMAKER +#ifdef _DEBUG_WEBMAKER /* dbgout << "--- total passes:" << passnb << endl; for (size_t n = 0; n != nodecount; ++n) @@ -409,50 +548,42 @@ void WebMaker::phase3() { - static CountTime phase3Timer("timer:ia32::webmaker:phase3"); - AutoTimer tm(phase3Timer); - - DBGOUT("--- phase3 ---" << endl;) - const Nodes& postOrder = irManager->getFlowGraph()->getNodesPostOrder(); - for (Nodes::const_iterator it = postOrder.begin(), end = postOrder.end(); it!=end; ++it) { + for (Nodes::const_iterator it = postOrder.begin(), end = postOrder.end(); it!=end; ++it) + { Node* nodep = *it; if (nodep->isBlockNode()) { const BitSet* globentryp = &nodexs.at(nodep->getId())->globentrys; - for (Inst* instp = (Inst*)nodep->getFirstInst(); instp!=NULL; instp = instp->getNextInst()) { + for (Inst* instp = (Inst*)nodep->getFirstInst(); instp != 0; instp = instp->getNextInst()) { const uint32 iprops = instp->getProperties(); - Inst::Opnds all(instp, Inst::OpndRole_All); - size_t itx = 0; - for (Inst::Opnds::iterator it = all.begin(); it != all.end(); it = all.next(it), ++itx) + + for (InstOpnds inops(instp, Inst::OpndRole_All, false); inops.hasMore(); inops.next()) { - Opnd* opndp = instp->getOpnd(it); + Opnd* opndp = inops.getOpnd(); Opndx* opndxp = opndxs.at(opndp->getId()); if (opndxp != 0) { OpDef* opdefp = 0; - const uint32 oprole = const_cast(instp)->getOpndRoles((uint32)itx); + const uint32 oprole = inops.getRole(); const bool isdef = ((oprole & Inst::OpndRole_UseDef) == Inst::OpndRole_Def) && ((iprops & Inst::Properties_Conditional) == 0 ); - DBGOUT(" O#" << opndp->getFirstId() << "(#" << opndp->getId() << ") def:" << isdef;) DBGOUT(" B#" << instp->getBasicBlock()->getId() << " " << *instp;) + DBGOUT(" " << *opndp << (isdef ? " DEF" : "") << endl;) if (isdef) {// opand definition here Opndx::OpDefs::iterator it = opndxp->opdefs.begin(), end = opndxp->opdefs.end(); - //if (opndxp->copdefp != 0) - // it = opndxp->copdefp; for (; it != end && it->defp != instp; ++it) ; assert(it != end); opdefp = &*it; - opndxp->copdefp = opdefp; + opndxp->copdefp = opdefp; opndxp->cbbp = nodep; - DBGOUT(" found1 globid#" << opdefp->globid << " def B#" << opdefp->defp->getBasicBlock()->getId() << " " << *opdefp->defp << endl;) } else {// opand usage here @@ -473,68 +604,59 @@ lastdefp = opdefp; } assert(lastdefp != 0); - DBGOUT(" found2 globid#" << opdefp->globid << " def B#" << opdefp->defp->getBasicBlock()->getId() << " " << *opdefp->defp << endl;) } else {// it can be usage of global or local definition opdefp = opndxp->copdefp; - DBGOUT(" found3 globid#" << opdefp->globid << " def B#" << opdefp->defp->getBasicBlock()->getId() << " " << *opdefp->defp << endl;) } } assert(opdefp != 0); if (opdefp->globid == -1) - { + {// local operand if (isdef) { ++opndxp->webscount; if (opndxp->webscount > 1) { opndxp->newopndp = splitOpnd(opndp); - DBGOUT("**new local web found O#" << opndxp->newopndp->getFirstId() << "(#" << opndxp->newopndp->getId() << ")" << endl;) + DBGOUT("**new local web found " << *opndp << " -> " << *opndxp->newopndp << endl;) } } if (opndxp->webscount > 1 && opdefp->defp->getNode() == nodep) { - instp->replaceOpnd(opndp, opndxp->newopndp); - DBGOUT(" opand replaced by O#" << opndxp->newopndp->getFirstId() << "(#" << opndxp->newopndp->getId() << ")" << endl;) + instp->replaceOpnd(opndp, opndxp->newopndp, isdef ? Inst::OpndRole_AllDefs : Inst::OpndRole_AllUses); + DBGOUT(" replace B#" << instp->getBasicBlock()->getId() << " " << *instp << endl;) } } else - { + {// global oprand if (opdefp->useps == 0) opdefp->useps = new OpDef::Instps(mm); - opdefp->useps->push_back(instp); + if (!isdef) + opdefp->useps->push_back(instp); } } } } } - } } } void WebMaker::phase4() { - static CountTime phase4Timer("timer:ia32::webmaker:phase4"); - AutoTimer tm(phase4Timer); - - DBGOUT("--- phase4 ---" << endl;) - Opndx* opndxp; - for (size_t i = 0; i != opandcount; ++i) + for (unsigned i = 0; i != opandcount; ++i) if ((opndxp = opndxs[i]) != 0 && !opndxp->opdefs.empty()) { - Opnd* opndp = irManager->getOpnd((uint32)i); + Opnd* opndp = irManager->getOpnd(i); Opndx::OpDefs& opdefs = opndxp->opdefs; - DBGOUT(" O#" << opndp->getFirstId() << "(#" << i << ")" << endl;) - for (size_t itx = 0; itx != opdefs.size(); ++itx) + for (unsigned itx = 0; itx != opdefs.size(); ++itx) { OpDef* opdefp = &opdefs[itx]; - DBGOUT(" " << itx << "->" << opdefp->linkx << " globid#" << opdefp->globid << endl;) if (opdefp->globid != -1 && !opdefp->visited) { Opnd* newopndp = 0; @@ -542,22 +664,23 @@ if (opndxp->webscount > 1) { newopndp = splitOpnd(opndp); - DBGOUT("**new global web found O#" << newopndp->getFirstId() << "(#" << newopndp->getId() << ")" << endl;) + DBGOUT("**new global web found " << *opndp << " -> " << *newopndp<< endl;) } while (!opdefp->visited) { - DBGOUT(" -visited globid#" << opdefp->globid << endl;) + if (newopndp != 0) + { + DBGOUT(" defp " << *opdefp->defp << endl;) + opdefp->defp->replaceOpnd(opndp, newopndp, Inst::OpndRole_AllDefs); + DBGOUT(" replace B#" << opdefp->defp->getBasicBlock()->getId() << " " << *opdefp->defp << endl;) - if (newopndp != 0 && opdefp->useps != 0) - { OpDef::Instps::iterator it = opdefp->useps->begin(), - end = opdefp->useps->end(); + end = opdefp->useps->end(); for (; it != end; ++it) { - (*it)->replaceOpnd(opndp, newopndp); - DBGOUT(" replace B#" << (*it)->getBasicBlock()->getId() << " " << *(*it);) - DBGOUT(" O#" << opndp->getFirstId() << "(#" << opndp->getId() << ")" << endl;) + (*it)->replaceOpnd(opndp, newopndp, Inst::OpndRole_AllUses); + DBGOUT(" replace B#" << (*it)->getBasicBlock()->getId() << " " << **it << endl;) } } @@ -581,19 +704,18 @@ break; } - size_t wx = lastdefp->linkx; + unsigned wx = lastdefp->linkx; lastdefp->linkx = opdefp->linkx; opdefp->linkx = wx; - DBGOUT("** glob def linked " << opdefp->globid << " + " << lastdefp->globid << endl;) } BitSet* WebMaker::bitsetp (BitSet*& bsp) { if (bsp == 0) - bsp = new BitSet(mm, (uint32)globcount); + bsp = new BitSet(mm, globcount); else - bsp->resize((uint32)globcount); + bsp->resize(globcount); return bsp; } @@ -601,46 +723,17 @@ Opnd* WebMaker::splitOpnd (const Opnd* op) { Opnd* np = irManager->newOpnd(op->getType(), op->getConstraint(Opnd::ConstraintKind_Initial)); - np->setCalculatedConstraint( op->getConstraint(Opnd::ConstraintKind_Calculated)); ++splitcount; return np; } //======================================================================================== -// Internal debug helpers -//======================================================================================== - - -#ifdef _DEBUG__WEBMAKER - -static void onConstruct (const IRManager& irm) -{ - MethodDesc& md = irm.getMethodDesc(); - const char * methodName = md.getName(); - const char * methodTypeName = md.getParentType()->getName(); - DBGOUT(endl << "Constructed " << methodTypeName << "." << methodName - << "(" << md.getSignatureString() << ")" << endl;) - -// sos = strcmp(methodTypeName, "spec/benchmarks/_213_javac/BatchEnvironment") == 0 && -// strcmp(methodName, "flushErrors") == 0; -} - - -static void onDestruct () -{ - DBGOUT(endl << "Destructed" << endl;) -} - -#endif //#ifdef _DEBUG__WEBMAKER - - -//======================================================================================== // Output formatters //======================================================================================== -#ifdef _DEBUG__WEBMAKER +#ifdef _DEBUG_WEBMAKER static ostream& operator << (ostream& os, Sep& x) { @@ -686,7 +779,7 @@ return os; } -#endif //#ifdef _DEBUG__WEBMAKER +#endif //#ifdef _DEBUG_WEBMAKER } //namespace Ia32 } //namespace Jitrino