Index: build/custom/msvc_2003/jitrino/jitrino.vcproj =================================================================== --- build/custom/msvc_2003/jitrino/jitrino.vcproj (revision 599381) +++ build/custom/msvc_2003/jitrino/jitrino.vcproj (working copy) @@ -766,6 +766,9 @@ RelativePath="..\..\..\..\vm\jitrino\src\codegenerator\ia32\Ia32RegAlloc3.cpp"> + + + + + + getOpndCount()) +{ + if (main_count == 0) + { + state = 6; + opnd = 0; + } + else if (forw) + { + main_idx = 0; + state = 0; + move(); + } + else + { + main_idx = main_count; + state = 3; + move(); + } +} + + +bool InstOpnds::move () +{ + opnd = 0; + + do + switch (state) + { + // forward iteration + + case 0: // main operands + opnd = inst->getOpnd(main_idx); + role = inst->getOpndRoles(main_idx); + if (++main_idx == main_count) + { + main_idx = 0; + state = 1; + } + return true; + + case 1: // find next memory operand + for (;; ++main_idx) + { + if (main_idx == main_count) + { + state = 6; + return false; + } + + main_opnd = inst->getOpnd(main_idx); + if (main_opnd->getMemOpndKind() != MemOpndKind_Null) + break; + } + + sub_idx = 0; + state = 2; + // fall to case 2 + + case 2: // sub operands + opnd = main_opnd->getMemOpndSubOpnd((MemOpndSubOpndKind)sub_idx); + role = Inst::OpndRole_OpndLevel | Inst::OpndRole_Use; + if (++sub_idx == 4) + { + ++main_idx; + state = 1; + } + break; + + // backward iteration + + case 3: // find prev memory operand + for (;;) + { + if (main_idx == 0) + { + main_idx = main_count; + state = 5; + goto S5; + } + + main_opnd = inst->getOpnd(--main_idx); + if (main_opnd->getMemOpndKind() != MemOpndKind_Null) + break; + } + + sub_idx = 4; + state = 4; + // fall to case 4 + + case 4: // sub operands + opnd = main_opnd->getMemOpndSubOpnd((MemOpndSubOpndKind)--sub_idx); + role = Inst::OpndRole_OpndLevel | Inst::OpndRole_Use; + if (sub_idx == 0) + state = 3; + break; + + case 5: // main operands +S5: opnd = inst->getOpnd(--main_idx); + role = inst->getOpndRoles(main_idx); + if (main_idx == 0) + state = 6; + return true; + + case 6: + return false; + } + while (opnd == 0 /*TBD: check roles here */); + + return true; +} + + +} //namespace Ia32 +} //namespace Jitrino Index: vm/jitrino/src/codegenerator/ia32/Ia32SpillGen.cpp =================================================================== --- vm/jitrino/src/codegenerator/ia32/Ia32SpillGen.cpp (revision 599381) +++ vm/jitrino/src/codegenerator/ia32/Ia32SpillGen.cpp (working copy) @@ -1109,6 +1109,7 @@ assignMem(opline, opline.instx, opline.instx); opline.forw(); return true; +// printf("###Memory - opnd: %d, node: %d, execCount: %e\n", opline.opnd->getId(), bblock->getId(), bblock->getExecCount()); } @@ -1155,6 +1156,7 @@ opline.back(endx); opline.forw(); +// printf("###Evict - opnd: %d, node: %d, execCount: %e\n", opline.opnd->getId(), bblock->getId(), bblock->getExecCount()); return true; } @@ -1882,5 +1884,3 @@ } //namespace Jitrino - - Index: vm/jitrino/src/codegenerator/ia32/Ia32ConstraintsResolver.cpp =================================================================== --- vm/jitrino/src/codegenerator/ia32/Ia32ConstraintsResolver.cpp (revision 599381) +++ vm/jitrino/src/codegenerator/ia32/Ia32ConstraintsResolver.cpp (working copy) @@ -26,66 +26,6 @@ { namespace Ia32{ -//======================================================================================== -// class Ia32ConstraintsResolver -//======================================================================================== -/** - * class Ia32ConstraintsResolver performs resolution of operand constraints - * and assigns calculated constraints (Opnd::ConstraintKind_Calculated) to operands. - * The resulting calculated constraints of operands determine allowable physical location - * for the operand. - * - * This transformer allows to insert operands into instructions before it - * regardless instruction constraints except that Initial constraints of explicit - * instruction operands must have non-null intersections with corresponding constraints - * of at least one opcode group of the instruction. - * - * ConstraintResolver analyzes instruction constraints and splits operands when necessary. - * - * This transformer ensures that - * 1) All instruction constraints for EntryPoints, CALLs and RETs - * are set appropriately (IRManager::applyCallingConventions()) - * 2) All operands has non-null calculated constraints - * 3) All operands fits into instructions they are used in (in terms of instruction constraints) - * For example: - * Original code piece: - * I38: (AD:s65:double) =CopyPseudoInst (AU:t1:double) - * I32: MULSD .s65.:double,.t2:double - * I33: RET t66(0):int16 (AU:s65:double) - * - * RET imposes constraint on s65 requiring to place it into FP0 register - * (its FP0D alias in this particular case) - * MULSD imposes constraint on s65 requiring to place it into XMM register - * - * After the pass: - * I38: (AD:s65:double) =CopyPseudoInst (AU:t1:double) - * I32: MULSD .s65.:double,.t2:double - * I46: (AD:t75:double) =CopyPseudoInst (AU:s65:double) - * I33: RET t66(20):int16 (AU:t75:double) - * - * Thus, ConstraintResolver inserted I46 splitting s65 to s65 and t75 - * s65 is assigned with Mem|XMM calculated constraint and t75 - * is assigned with FP0D calculated calculated constraint - * - * 4) If the live range of an operand crosses a call site and the operand is not redefined - * in the call site, the calculated constraint of the operand is narrowed the callee-save regs - * or memory (stack) - * - * 5) If the operand (referred as original operand here) is live at entry of a catch handler - * then necessary operand splitting is performed as close as possible to the instruction - * which caused the splitting and original operand is used before and after the instruction. - * - * The main principle of the algorithm is anding of instruction constraints into - * operand calculated constraints and splitting operands to ensure that the calculated constraint - * is not null - * - * This transformer must be inserted before register allocator which relies on - * calculated operand constraints. - * - * The implementation of this transformer is located in the ConstraintResolverImpl class. - * - */ - static const char* help = " The 'constraints' action accepts 3 sets of 3 parameters which \n" " define profile-guided operand splitting for operand\n" @@ -119,97 +59,75 @@ " Default value is 0 ('never').\n" ; -class ConstraintsResolver : public SessionAction { - /** runImpl is required override, calls ConstraintsResolverImpl.runImpl */ - void runImpl(); - /** This transformer requires up-to-date liveness info */ - uint32 getNeedInfo()const{ return NeedInfo_LivenessInfo; } - uint32 getSideEffects()const{ return 0; } - -}; - static ActionFactory _constraints("constraints", help); //======================================================================================== -// class ConstraintsResolverImpl +// class ConstraintsResolver //======================================================================================== -/** - * class Ia32ConstraintsResolverImpl is an implementation of simple constraint resolution algorithm - * The algorithm takes one-pass over CFG. - * - * The algorithm works as follows: - * - * 1) Creates an array of basic blocks and orders by bb->getExecCount() - * in createBasicBlockArray(). - * Thus, the algorithm handles hottest basic blocks first and constraints are assigned to operands first - * from the most frequently used instructions - * - * 2) Collects a bit vector of all operands live at entries of all dispatch node entries - * in calculateLiveAtDispatchBlockEntries() - * - * 3) For all operands: - * - If an operand has already been assigned to some location - * (its location constraint is not null) the calculated constraint is set to - * the location constraint - * - * - If an operand is live at entry of a dispatch node - * the calculated constraint is set to the constraint - * preserving operand values during exception throwing - * This constraint is returned by getDispatchEntryConstraint - * In fact this is the constriant for the DRL calling convention - * - * This is done in calculateStartupOpndConstraints() - * Originally all calculateed constraints are equial to Initial constraints - * - * 4) Walks through all basic blocks collected and arranged at step 1 - * in resolveConstraints() - * - * The opndReplaceWorkset array of operand replacements is maintained - * (indexed by from-operand id). - * - * This is the array of current replacement for operands - * and is reset for each basic block (local within basic blocks) - * - * This array is filled as a result of operand splitting and indicates - * which operand must be used instead of original ones for all the instructions - * above the one caused splitting - * - * 4.1) Walks throw all instruction of a basic block in backward order - * in resolveConstraints(BasicBlock * bb) - * 4.1.1) resolves constraints for each instruction - * in resolveConstraints(Inst * inst); - * - * To do this already collected calculated constraint of - * either original operand or its current replacement is anded - * with instruction constraint for this operand occurence and - * if the result is null, new operand is created and substituted instead - * - * 4.1.1.1) All def operands of the isntruction are traversed - * and operand splitting is performed after the instruction (when necessary) - * def&use cases are also handled during this step - * 4.1.1.2) If the instruction is CALL, all hovering operands of - * the isntruction are traversed. - * - * Hovering operands are operands which are live across a call site and are not - * redefined in the call site - * This step ensures operands are saved in callee-save regs or memory - * and takes into account whether an operand is live at dispatch node entries - * - * Operand splitting is performed before the instruction (when necessary) - * 4.1.1.3) All use operands of the instruction are traversed - * and operand splitting is performed before the instruction (when necessary) - * - * The current implementation doesn't deal properly with conditional memory constraints. - * I.e. it doesn't resolve properly things like ADD m, m when both operands are already - * assigned. - * - * For more details please refer to ConstraintsResolverImpl source code - */ +//static bool sos = false; +// +//static std::ostream& operator << (std::ostream& os, Constraint& c) +//{ +// os << ""; +// return os; +//} //_________________________________________________________________________________________________ -Constraint ConstraintsResolverImpl::getCalleeSaveConstraint(Inst * inst, Opnd * opnd) +void ConstraintsResolver::runImpl() { + getArg("callSplitThresholdForNoRegs", callSplitThresholdForNoRegs); + getArg("callSplitThresholdFor1Reg", callSplitThresholdFor1Reg); + getArg("callSplitThresholdFor4Regs", callSplitThresholdFor4Regs); + + getArg("defSplitThresholdForNoRegs", defSplitThresholdForNoRegs); + getArg("defSplitThresholdFor1Reg", defSplitThresholdFor1Reg); + getArg("defSplitThresholdFor4Regs", defSplitThresholdFor4Regs); + + getArg("useSplitThresholdForNoRegs", useSplitThresholdForNoRegs); + getArg("useSplitThresholdFor1Reg", useSplitThresholdFor1Reg); + getArg("useSplitThresholdFor4Regs", useSplitThresholdFor4Regs); + +// MethodDesc* md = getIRManager().getCompilationInterface().getMethodToCompile(); +// const char* mtype = md->getParentType()->getName(), +// * mname = md->getName(), +// * msig = md->getSignatureString(); + +// log(LogStream::CT) << "***CONSTRAINTS for " << mtype << "|" << mname << "|" << msig << std::endl; + +// sos = strcmp(mtype, "java/util/HashMap") == 0 +// && strcmp(mname, "putImpl") == 0 +// && strcmp(msig, "(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;") == 0; + +// if (sos) +// { +// log(LogStream::CT) << "*** thresholds: " +// << callSplitThresholdForNoRegs << ", " +// << callSplitThresholdFor1Reg << ", " +// << callSplitThresholdFor4Regs +// << std::endl; +// } + + process(&getIRManager()); +} + +//_________________________________________________________________________________________________ +void ConstraintsResolver::initz() +{ + callSplitThresholdForNoRegs = 0, + callSplitThresholdFor1Reg = 0, + callSplitThresholdFor4Regs = 0, + defSplitThresholdForNoRegs = 0, + defSplitThresholdFor1Reg = 0, + defSplitThresholdFor4Regs = 0, + useSplitThresholdForNoRegs = 0, + useSplitThresholdFor1Reg = 0, + useSplitThresholdFor4Regs = 0; +} + +//_________________________________________________________________________________________________ +Constraint ConstraintsResolver::getCalleeSaveConstraint(Inst * inst, Opnd * opnd) +{ // This implementation don't take into account operand types // and provides only GP call-safe regs (thus only memory for non-integer and non-pointer types) assert(inst->getKind()==Inst::Kind_CallInst); @@ -218,21 +136,26 @@ } //_________________________________________________________________________________________________ -Constraint ConstraintsResolverImpl::getDispatchEntryConstraint(Opnd * opnd) +Constraint ConstraintsResolver::getDispatchEntryConstraint(Opnd * opnd) { // Currently the same result as from getCalleeSaveConstraint - Constraint c=(Constraint(OpndKind_Memory)|STACK_REG|Constraint(irManager.getEntryPointInst()->getCallingConventionClient().getCallingConvention()->getCalleeSavedRegs(OpndKind_GPReg))) & opnd->getConstraint(Opnd::ConstraintKind_Initial); + Constraint c=(Constraint(OpndKind_Memory)|STACK_REG|Constraint(irManager->getEntryPointInst()->getCallingConventionClient().getCallingConvention()->getCalleeSavedRegs(OpndKind_GPReg))) & opnd->getConstraint(Opnd::ConstraintKind_Initial); return c.isNull()?Constraint(OpndKind_Memory, opnd->getSize()):c; } //_________________________________________________________________________________________________ -void ConstraintsResolverImpl::run() +void ConstraintsResolver::process(IRManager* irm) { + irManager = irm; + + const uint32 was_opnds = irManager->getOpndCount(), + was_insts = irManager->getMaxInstId(); + // Set all instruction constraints for EntryPoints, CALLs and RETs if (!second) - irManager.applyCallingConventions(); + irManager->applyCallingConventions(); // Initialization - originalOpndCount=irManager.getOpndCount(); + originalOpndCount=irManager->getOpndCount(); liveOpnds.resizeClear(originalOpndCount); needsOriginalOpnd.resizeClear(originalOpndCount); liveAtDispatchBlockEntry.resizeClear(originalOpndCount); @@ -256,28 +179,34 @@ // Resolve constraints resolveConstraints(); // This is a local transformation, resize liveness vectors - irManager.fixLivenessInfo(); + irManager->fixLivenessInfo(); + + const uint32 opnds = irManager->getOpndCount() - was_opnds, + insts = irManager->getMaxInstId() - was_insts; + if (opnds != 0 || insts != 0) + Log::log_ct() << "ConstraintResolver: opands added:" << opnds + << " instructions added:" << insts << std::endl; } //_________________________________________________________________________________________________ -double ConstraintsResolverImpl::getBasicBlockPriority(Node * node) +double ConstraintsResolver::getBasicBlockPriority(Node * node) { // Use simple heuristics to handle prologs and epilogs after all other nodes. // This improves performance as prologs and epilogs usually set bad constraints // to operands (entry points, rets) return - irManager.getFlowGraph()->getEntryNode() == node ? (double)0 : - irManager.isEpilog(node) ? (double)1 : + irManager->getFlowGraph()->getEntryNode() == node ? (double)0 : + irManager->isEpilog(node) ? (double)1 : 10 + node->getExecCount(); } //_________________________________________________________________________________________________ -void ConstraintsResolverImpl::createBasicBlockArray() +void ConstraintsResolver::createBasicBlockArray() { // Filling of basicBlock, simple insertion-based ordering of basic blocks - const Nodes& nodes = irManager.getFlowGraph()->getNodes(); + const Nodes& nodes = irManager->getFlowGraph()->getNodes(); for (Nodes::const_iterator it = nodes.begin(), end = nodes.end(); it!=end; ++it) { Node* node = *it; if (node->isBlockNode()){ @@ -293,25 +222,25 @@ } //_________________________________________________________________________________________________ -void ConstraintsResolverImpl::calculateLiveAtDispatchBlockEntries() +void ConstraintsResolver::calculateLiveAtDispatchBlockEntries() { - const Nodes& nodes = irManager.getFlowGraph()->getNodes(); + const Nodes& nodes = irManager->getFlowGraph()->getNodes(); for (Nodes::const_iterator it = nodes.begin(), end = nodes.end(); it!=end; ++it) { Node *node = *it; if (node->isDispatchNode()) { - liveAtDispatchBlockEntry.unionWith(*irManager.getLiveAtEntry(node)); + liveAtDispatchBlockEntry.unionWith(*irManager->getLiveAtEntry(node)); } } } //_________________________________________________________________________________________________ -void ConstraintsResolverImpl::calculateStartupOpndConstraints() +void ConstraintsResolver::calculateStartupOpndConstraints() { // Reset calculated constraints to null constraints - irManager.resetOpndConstraints(); + irManager->resetOpndConstraints(); // For all operands in the CFG for (uint32 i=0; igetOpnd(i); Constraint c=opnd->getConstraint(Opnd::ConstraintKind_Initial); Constraint cl=opnd->getConstraint(Opnd::ConstraintKind_Location); @@ -333,7 +262,7 @@ } //_________________________________________________________________________________________________ -bool ConstraintsResolverImpl::constraintIsWorse(Constraint cnew, Constraint cold, unsigned normedBBExecCount, +bool ConstraintsResolver::constraintIsWorse(Constraint cnew, Constraint cold, unsigned normedBBExecCount, unsigned splitThresholdForNoRegs, unsigned splitThresholdFor1Reg, unsigned splitThresholdFor4Regs ) { @@ -351,13 +280,15 @@ } //_________________________________________________________________________________________________ -void ConstraintsResolverImpl::resolveConstraintsWithOG(Inst * inst) +void ConstraintsResolver::resolveConstraintsWithOG(Inst * inst) { +//log(LogStream::CT) << " I#" << inst->getId() << std::endl; + // Initialize hoveringOpnds with operands live after the call if the inst is CALL if (inst->getMnemonic()==Mnemonic_CALL) hoveringOpnds.copyFrom(liveOpnds); - double dblExecCount = 1000. * inst->getBasicBlock()->getExecCount() / irManager.getFlowGraph()->getEntryNode()->getExecCount(); + double dblExecCount = 1000. * inst->getBasicBlock()->getExecCount() / irManager->getFlowGraph()->getEntryNode()->getExecCount(); if (dblExecCount > 100000000.) dblExecCount = 100000000.; unsigned execCount = (unsigned)dblExecCount; @@ -392,10 +323,11 @@ currentOpnd->setCalculatedConstraint(cr); }else{ // cannot substitute currentReplacementOpnd into this position, needs splitting - opndToSet=irManager.newOpnd( originalOpnd->getType(), ci | Constraint(OpndKind_Mem, ci.getSize()) ); - Inst * copySequence=irManager.newCopyPseudoInst(Mnemonic_MOV, currentOpnd, opndToSet); + opndToSet=irManager->newOpnd( originalOpnd->getType(), ci | Constraint(OpndKind_Mem, ci.getSize()) ); + Inst * copySequence=irManager->newCopyPseudoInst(Mnemonic_MOV, currentOpnd, opndToSet); // split after the defining instruction copySequence->insertAfter(inst); +//log(LogStream::CT) << " split def O#" << opndToSet->getFirstId() << std::endl; if (inst->getOpndRoles(it)&Inst::OpndRole_Use){ // This is def&use case (like add t0, t1 for t0) if (!needsOriginalOpnd.getBit(originalOpnd->getId())){ @@ -404,7 +336,7 @@ }else{ // use the original operand for all the instructions above assert(currentOpnd==originalOpnd); - Inst * copySequence=irManager.newCopyPseudoInst(Mnemonic_MOV, opndToSet, originalOpnd); + Inst * copySequence=irManager->newCopyPseudoInst(Mnemonic_MOV, opndToSet, originalOpnd); // split above the instruction copySequence->insertBefore(inst); opndUsage[originalOpnd->getId()]+=execCount; @@ -428,7 +360,7 @@ // for all operands BitSet::IterB ib(hoveringOpnds); for (int i = ib.getNext(); i != -1; i = ib.getNext()){ - Opnd * originalOpnd=irManager.getOpnd(i); + Opnd * originalOpnd=irManager->getOpnd(i); assert(originalOpnd->getId()getId())||opndReplaceWorkset[originalOpnd->getId()]==NULL); // currentOpnd is either the current replacement or the original operand @@ -457,12 +389,19 @@ // Try to use originalOpnd over this instruction and for the instructions above Constraint co=originalOpnd->getConstraint(Opnd::ConstraintKind_Calculated); Constraint cr=co & ci; + +//log(LogStream::CT).out() +// << " O#" << originalOpnd->getFirstId() +// << " cr:" << cr << " cc:" << cc +// << std::dec << std::endl; + if (!constraintIsWorse(cr, cc, execCount, callSplitThresholdForNoRegs, callSplitThresholdFor1Reg, callSplitThresholdFor4Regs)){ opndToSet=originalOpnd; opndToSet->setCalculatedConstraint(cr); }else{ // cannot use original, create a new one - opndToSet=irManager.newOpnd(originalOpnd->getType(), ci | Constraint(OpndKind_Mem, ci.getSize())); + opndToSet=irManager->newOpnd(originalOpnd->getType(), ci | Constraint(OpndKind_Mem, ci.getSize())); +//log(LogStream::CT) << " split call O#" << opndToSet->getFirstId() << std::endl; } } } @@ -472,7 +411,7 @@ // an operand different to the current replacement // is required to be over this call site, append splitting below the call site // this is like restoring from a call-safe location under a call - Inst * copySequence=irManager.newCopyPseudoInst(Mnemonic_MOV, currentOpnd, opndToSet); + Inst * copySequence=irManager->newCopyPseudoInst(Mnemonic_MOV, currentOpnd, opndToSet); copySequence->insertAfter(inst); } if (!needsOriginalOpnd.getBit(originalOpnd->getId())) @@ -481,7 +420,7 @@ // add splitting above // this is like saving into a call-safe location above a call assert(currentOpnd==originalOpnd); - Inst * copySequence=irManager.newCopyPseudoInst(Mnemonic_MOV, opndToSet, originalOpnd); + Inst * copySequence=irManager->newCopyPseudoInst(Mnemonic_MOV, opndToSet, originalOpnd); copySequence->insertBefore(inst); opndUsage[originalOpnd->getId()]+=execCount; } @@ -520,9 +459,10 @@ // cannot substitute currentReplacementOpnd into this position, needs splitting // split above the inst, force to insert the new operand into the inst, and use // currentOpnd above - opndToSet=irManager.newOpnd(originalOpnd->getType(), ci | Constraint(OpndKind_Mem, ci.getSize())); - Inst * copySequence=irManager.newCopyPseudoInst(Mnemonic_MOV, opndToSet, currentOpnd); + opndToSet=irManager->newOpnd(originalOpnd->getType(), ci | Constraint(OpndKind_Mem, ci.getSize())); + Inst * copySequence=irManager->newCopyPseudoInst(Mnemonic_MOV, opndToSet, currentOpnd); copySequence->insertBefore(inst); +//log(LogStream::CT) << " split use O#" << opndToSet->getFirstId() << std::endl; } // update liveness (for def/use case if (inst->isLiveRangeStart(it)) @@ -535,11 +475,14 @@ } //_________________________________________________________________________________________________ -void ConstraintsResolverImpl::resolveConstraints(Node * bb) +void ConstraintsResolver::resolveConstraints(Node * bb) { +//if (sos) +//log(LogStream::CT) << " N#" << bb->getId() << std::endl; + assert(bb->isBlockNode()); // scan all insts of bb in reverse order - irManager.getLiveAtExit(bb, liveOpnds); + irManager->getLiveAtExit(bb, liveOpnds); for (Inst * inst=(Inst*)bb->getLastInst(), * prevInst=NULL; inst!=NULL; inst=prevInst){ prevInst=inst->getPrevInst(); resolveConstraintsWithOG(inst); @@ -548,16 +491,16 @@ // if we come to bb entry with some replacement for an operand and the operand is live at the entry // insert copying from the original operand to the replacement operand uint32 execCount = (uint32)bb->getExecCount(); - BitSet * ls = irManager.getLiveAtEntry(bb); + BitSet * ls = irManager->getLiveAtEntry(bb); BitSet::IterB ib(*ls); for (int i = ib.getNext(); i != -1; i = ib.getNext()){ - Opnd * originalOpnd = irManager.getOpnd(i); + Opnd * originalOpnd = irManager->getOpnd(i); assert(originalOpnd->getId()getId()]; if (currentOpnd!=NULL){ if (currentOpnd!=originalOpnd){ -// assert(irManager.getLiveAtEntry(bb)->isLive(originalOpnd)); - Inst * copySequence=irManager.newCopyPseudoInst(Mnemonic_MOV, currentOpnd, originalOpnd); +// assert(irManager->getLiveAtEntry(bb)->isLive(originalOpnd)); + Inst * copySequence=irManager->newCopyPseudoInst(Mnemonic_MOV, currentOpnd, originalOpnd); bb->prependInst(copySequence); opndUsage[originalOpnd->getId()]+=execCount; } @@ -567,7 +510,7 @@ } //_________________________________________________________________________________________________ -void ConstraintsResolverImpl::resolveConstraints() +void ConstraintsResolver::resolveConstraints() { // for all basic blocks in the array for (uint32 ibb=0, nbb=(uint32)basicBlocks.size(); ibbgetOpndCount()) -{ - if (main_count == 0) - { - state = 6; - opnd = 0; - } - else if (forw) - { - main_idx = 0; - state = 0; - move(); - } - else - { - main_idx = main_count; - state = 3; - move(); - } -} - - -bool InstOpnds::move () -{ - opnd = 0; - - do - switch (state) - { - // forward iteration - - case 0: // main operands - opnd = inst->getOpnd(main_idx); - role = inst->getOpndRoles(main_idx); - if (++main_idx == main_count) - { - main_idx = 0; - state = 1; - } - return true; - - case 1: // find next memory operand - for (;; ++main_idx) - { - if (main_idx == main_count) - { - state = 6; - return false; - } - - main_opnd = inst->getOpnd(main_idx); - if (main_opnd->getMemOpndKind() != MemOpndKind_Null) - break; - } - - sub_idx = 0; - state = 2; - // fall to case 2 - - case 2: // sub operands - opnd = main_opnd->getMemOpndSubOpnd((MemOpndSubOpndKind)sub_idx); - role = Inst::OpndRole_OpndLevel | Inst::OpndRole_Use; - if (++sub_idx == 4) - { - ++main_idx; - state = 1; - } - break; - - // backward iteration - - case 3: // find prev memory operand - for (;;) - { - if (main_idx == 0) - { - main_idx = main_count; - state = 5; - goto S5; - } - - main_opnd = inst->getOpnd(--main_idx); - if (main_opnd->getMemOpndKind() != MemOpndKind_Null) - break; - } - - sub_idx = 4; - state = 4; - // fall to case 4 - - case 4: // sub operands - opnd = main_opnd->getMemOpndSubOpnd((MemOpndSubOpndKind)--sub_idx); - role = Inst::OpndRole_OpndLevel | Inst::OpndRole_Use; - if (sub_idx == 0) - state = 3; - break; - - case 5: // main operands -S5: opnd = inst->getOpnd(--main_idx); - role = inst->getOpndRoles(main_idx); - if (main_idx == 0) - state = 6; - return true; - - case 6: - return false; - } - while (opnd == 0 /*TBD: check roles here */); - - return true; -} - - -//======================================================================================== // WebMaker implementation //======================================================================================== @@ -339,8 +193,9 @@ DBGOUT("Calculating constraints" << endl;) irManager->calculateLivenessInfo(); - ConstraintsResolverImpl impl(*irManager, true); - impl.run(); + ConstraintsResolver cr(true); + cr.initz(); + cr.process(irManager); } Index: vm/jitrino/src/codegenerator/ia32/InstOpnds.h =================================================================== --- vm/jitrino/src/codegenerator/ia32/InstOpnds.h (revision 0) +++ vm/jitrino/src/codegenerator/ia32/InstOpnds.h (revision 0) @@ -0,0 +1,42 @@ + + +#include "Ia32IRManager.h" + + +namespace Jitrino +{ +namespace Ia32 +{ + + +class InstOpnds +{ +public: + + InstOpnds (const Inst* inst, uint32 roles = Inst::OpndRole_All, bool forw = true); + + bool hasMore () const {return opnd != 0;} + void next () {move();} + Opnd* getOpnd () const {return opnd;} + uint32 getRole () const {return role;} + +protected: + + bool move (); + + const Inst* inst; + const uint32 roles; + const unsigned main_count; + + unsigned state, + main_idx, + sub_idx; + + uint32 role; + Opnd* opnd; + Opnd* main_opnd; +}; + + +} //namespace Ia32 +} //namespace Jitrino Index: vm/jitrino/src/codegenerator/ia32/Ia32RegAlloc3.cpp =================================================================== --- vm/jitrino/src/codegenerator/ia32/Ia32RegAlloc3.cpp (revision 599381) +++ vm/jitrino/src/codegenerator/ia32/Ia32RegAlloc3.cpp (working copy) @@ -527,6 +527,11 @@ void RegAlloc3::runImpl () { +#ifdef _TIMER + XTimer timer; + timer.start(); +#endif + getIRManager().fixEdgeProfile(); registers.parse(getArg("regs")); @@ -562,6 +567,19 @@ count_coalesced += coalesceCount; SpillGen(); + +#ifdef _TIMER + timer.stop(); + if (isLogEnabled(LogStream::DBG)) + { + MethodDesc& md = getIRManager().getMethodDesc(); + log(LogStream::DBG).out() + << timer.getSeconds() + << " " << getIRManager().getOpndCount() + << " " << md.getParentType()->getName() << "." << md.getName() << md.getSignatureString() + << endl; + } +#endif } @@ -635,6 +653,9 @@ Constraint loc = opnd->getConstraint(Opnd::ConstraintKind_Location, OpndSize_Default); if (loc.isNull()) {// this operand is not allocated yet + if (opnd->isPlacedIn(OpndKind_Imm)) { + printf("Operand %d is immediate", opnd->getId()); + } loc = opnd->getConstraint(Opnd::ConstraintKind_Calculated, OpndSize_Default); if ((ridx = registers.index(loc)) != -1) {// operand should be assigned to register Index: vm/jitrino/src/codegenerator/ia32/Ia32RegAlloc4.cpp =================================================================== --- vm/jitrino/src/codegenerator/ia32/Ia32RegAlloc4.cpp (revision 0) +++ vm/jitrino/src/codegenerator/ia32/Ia32RegAlloc4.cpp (revision 0) @@ -0,0 +1,2987 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @author Intel, Sergey L. Ivashin + */ + +#include "Ia32IRManager.h" +#include "InstOpnds.h" +#include "Ia32ConstraintsResolver.h" +#include "Dominator.h" +#include "Ia32RegAllocCheck.h" +#include "Stl.h" +#include "Log.h" +#include "Ia32Printer.h" +#include +#include +#include +#include +#include +#include + +#ifdef _DEBUG_REGALLOC4 +#include "IOp.h" +#include "Ia32IRXMLDump.h" +#endif + +#ifdef _DEBUG_REGALLOC4 +#ifdef _MSC_VER +#pragma warning(disable : 4505) //unreferenced local function has been removed +#endif //#ifdef _MSC_VER +#endif //#ifdef _DEBUG_REGALLOC4 + + +using namespace std; + +//#ifdef _EM64T_ +//#define _SKIP_CATCHED +//#endif + +namespace Jitrino +{ + +namespace Ia32 +{ + + +//======================================================================================== +// class Ia32RegAlloc4 +//======================================================================================== + +/** + * This class attempts to assign register for any operand (found in LIR) that can be + * allocated in register. + * + * Set of registers available for allocation is specified by input arguments and saved + * in 'constrs' class member. All operands that cannot be allocated in the registers + * available are simply ignored. + * + * So this allocator should be called for each set of the registers available (GPReg, XMM, + * FP) independently. + * + * It is not guaranteed that all operands which can be assigned will be assigned. + * Therefore, the companion class (SpillGen) must be used after this allocator. + * + */ + +struct RegAlloc4 : public SessionAction +{ + MemoryManager mm; // this is private MemoryManager, not irm.getMemoryManager() + + unsigned flag_SORT; + bool flag_NEIGBH; + bool flag_COALESCE; + + unsigned coalesceCount; + + class BoolMatrix; + typedef uint32 RegMask; // used to represent set of registers + + static void merge (Constraint& c, RegMask mk) {c.setMask(c.getMask() | mk);} + +// Table of all available registers sets. +// Each set (GPReg, FPReg, XMMReg and so on) is represented by the corresponding +// Constraint object. + struct Registers : public StlVector + { + Registers (MemoryManager& mm) :StlVector(mm) {} + + void parse (const char*); + + // register the new constraint (register) in the table. + // if table doesn't contain constraint of the specified kind, it will be ignored + // (add = false) or new table entry for the constraint will be created (add = true). + int merge (const Constraint&, bool add = false); + + // returns table index of the constraint of the specified kind or -1 + int index (const Constraint&) const; + + int indexes[IRMaxRegKinds]; + }; + Registers registers; + + + enum DefUse_Flags + { + Split = 0x100, + Fork = 0x200, + Join = 0x400 + }; + + struct DefUse + { + Inst* inst; + uint32 role; + double cost; + + bool operator == (Inst* inst) const {return this->inst == inst;} + }; + + typedef StlList DefUses; + + struct Opndx; // forward declaration + + + + typedef StlList RegMasksList; + struct RegPress + { + RegMasksList regMasksList; + int elementsNum; + + RegPress (MemoryManager& mm); + void add (RegMask); + void remove (RegMask); + boolean isConsist(); + }; + + + typedef StlList Indexes; //ToBeRemoved (change to OpndxList) + typedef StlList OpndxList; + + struct Saturation + { + Saturation (MemoryManager& mm) : lives(mm) {} + + Inst* inst; + int regsavail, + idx; + OpndxList lives; + }; + typedef StlList SaturationList; + + SaturationList saturations; + + // For efficiency, empty stack is implemented without allocating an empty vector + struct OpndxStack + { + OpndxStack (MemoryManager& m) : mm(m), stack(0) {} + + void push (Opndx*); + void pop (); + Opndx* top () const; + bool empty () const; + + MemoryManager& mm; + StlVector* stack; + }; + + struct Opndx + { + Opndx (MemoryManager& mm) : opnd(0) + , adjacents(mm), hiddens(mm), neighbs(mm), defuses(mm) + , splits(0), firstbb(0), opstack(mm), saturations(0) + , alloc(0), spill(false) + {} + + bool isCandidate () const {return alloc == 0;} + void note (Inst*, uint32 role, DefUse* = 0); + int significance () const {return (saturations == 0) ? 0 : saturations->size();} + bool active () const; + void computeCost (); + void summCost (); + + int idx; + Opnd* opnd; + + Indexes adjacents, // change to OpndxList* + hiddens; // change to OpndxList* + Indexes neighbs; // change to OpndxList* + + DefUses defuses; + + OpndxList* splits; + Node* firstbb; // temporary: after splitting points to first block where the operand defined + OpndxStack opstack; + + SaturationList* saturations; + + int ridx; // index in Registers of register assigned/will be assigned + RegMask alloc, // 0 or mask of the register assigned + avail; // if not assigned, then mask of the registers available (defined by calculated constraint) + unsigned nbavails; // number of the registers available for this operand ( =bitCount(avail) ) + double cost; + bool spill; // operand selected for spilling + }; + + +// Operand's graph to be colored + class Graph + { + public: + + Graph (MemoryManager& mm) : opndxs(mm), opndmap(mm) {} + + bool empty () const {return opndxs.empty();} + unsigned size () const {return opndxs.size();} + void clear () {opndxs.clear(); opndmap.clear();} + void reserve (unsigned n) {opndxs.reserve(n);} + void remap (); // builds mapping between IRManager operandx index and graph index + +// The following methods use graph's index + + int add (Opndx*); + Opndx*& at (int x) {return opndxs.at((unsigned)x);} + const Opndx* at (int x) const {return opndxs.at((unsigned)x);} + Opndx*& operator [] (unsigned x) {return opndxs.operator[](x);} + const Opndx* operator [] (unsigned x) const {return opndxs.operator[](x);} + + void connect (int x1, int x2); + int disconnect (int x); + void reconnect (int x); + void moveNodes (Indexes& from, Indexes& to, int x) const; + +// The following methods use IRManager's operand index as input (Opnd::getId()) + + int getIdx (int i) const {return (unsigned)i < opndmap.size() ? opndmap[i] : -1;} + void setIdx (int i, int x) {assert((unsigned)i < opndmap.size()); opndmap[i] = x;} + Opndx* getOpndx (int i) {int x = getIdx(i); return x == -1 ? 0 : opndxs.at(x);} + + protected: + + StlVector opndxs; + StlVector opndmap; // mapping opnd.opid -> graph index or -1 + }; + + Graph graph; + + unsigned graphsize, // total size of graph (operands + registers) + xregbase; // index of first register in graph + + StlVector nstack; + struct sortRule1; + +#ifdef _DEBUG_REGALLOC4 + IOp* iop; +#endif + + + RegAlloc4 () : mm("RegAlloc4") + , registers(mm), saturations(mm) + , graph(mm), nstack(mm) + {} + + uint32 getNeedInfo () const {return NeedInfo_LivenessInfo;} + uint32 getSideEffects () const {return coalesceCount == 0 ? 0 : SideEffect_InvalidatesLivenessInfo;} + + void runImpl(); + void SpillGen (); + bool verify (bool force=false); + + bool buildGraph (); + bool isForkNode (Node * node); + void splitOpnds (); + Opndx* splitOpnd (Opndx*); + void domtreeWalk (DominatorNode*, int depth); + void normalize (); + void setupSaturations (); + void checkSaturations (); + void mergeOpndxs (); + bool shouldMerge (Opndx*, Opndx*, DefUse*) const; + void merge (Opndx*, Opndx*); + void spillOpndxs (); + void spillOpndx (Opndx*, OpndxList&); + void cleanupSplits (); + void setupConstraints (); + void processNodes (BoolMatrix&); + void processInst (Inst*, BitSet&, BoolMatrix& /*, double excount*/); + void showGraph (const char*); + void showOpands (const char*, bool du = true); + void lookLives (Opnd*, BitSet&, BoolMatrix&); + int findNode (Opnd*) const; + bool coalescing (BoolMatrix& matrix); + void coalesce (BoolMatrix& matrix, Opndx*, Opndx*); + int duplicates (Indexes& list, BoolMatrix& matrix, int x0, int x1); + void pruneGraph (); + bool shouldPrune (const Opndx*) const; + bool assignRegs (); + bool assignReg (Opndx*); + void spillRegs (); + int spillReg (Opndx*); + int update (const Inst*, const Opnd*, Constraint&) const; + Node* insertNode (Edge*); + Inst* insertCopy (bool atentry, Node*, Opnd*); +}; + + +static ActionFactory _pg_regalloc("pg_regalloc"); + + +static Counter count_spilled("ia32:regalloc4:spilled", 0), + count_assigned("ia32:regalloc4:assigned", 0), + count_coalesced("ia32:regalloc4:coalesced", 0); + + +//======================================================================================== +// Internal debug helpers +//======================================================================================== + + +using std::endl; +using std::ostream; + +#ifdef _DEBUG_REGALLOC4 + +struct Sep +{ + Sep () :first(true) {} + + bool first; +}; + +static ostream& operator << (ostream&, Sep&); + +static ostream& operator << (ostream&, const Inst&); + +static ostream& operator << (ostream&, const Opnd&); + +static ostream& operator << (ostream&, Constraint); + +static ostream& operator << (ostream&, const RegAlloc4::Registers&); + +struct RegMasks +{ + RegMasks (Constraint x, RegAlloc4::RegMask mk) : c(x) {c.setMask(mk);} + + Constraint c; +}; + +static ostream& operator << (ostream&, RegMasks); + +static ostream& outRegMasks (ostream&, RegAlloc4::RegMask*, const RegAlloc4::Registers&); + +static ostream& operator << (ostream&, const RegAlloc4::DefUse&); + +static ostream& operator << (ostream&, const RegAlloc4::Opndx&); + +static ostream& operator << (ostream&, const RegAlloc4::Graph&); + +#define DBGOUT(s) log(LogStream::DBG).out() << s + +#else + +#define DBGOUT(s) + +#endif + + +//======================================================================================== +// Utility +//======================================================================================== + + +static int instCount (const IRManager& irm) +{ + int count = 0; + + const Nodes& nodes = irm.getFlowGraph()->getNodesPostOrder(); + for (Nodes::const_iterator it = nodes.begin(), end = nodes.end(); it != end; ++it) + { + Node* node = *it; + if (node->isBlockNode()) + for (Inst* inst = (Inst*)node->getLastInst(); inst != 0; inst = inst->getPrevInst()) + ++count; + } + + return count; +} + + +static int bitCount (RegAlloc4::RegMask mk) +{ + int count = 0; + while (mk != 0) + { + if ((mk & 1) != 0) + ++count; + mk >>= 1; + } + return count; +} + + +static int bitNumber (RegAlloc4::RegMask mk) +{ + assert(mk != 0); + + int number = 0; + while (mk != 1) + { + ++number; + mk >>= 1; + } + return number; +} + + +static RegAlloc4::RegMask findHighest (RegAlloc4::RegMask mk) +{ + assert(mk != 0); + + RegAlloc4::RegMask high = 1, + highest = (RegAlloc4::RegMask)~1; + + while ((mk & highest) != 0) { + high <<= 1, + highest <<= 1; + } + + return high; +} + + +//======================================================================================== +// Tokens - Utility class for (zero-terminated) strings parsing +//======================================================================================== + + +class Tokens +{ +public: + + Tokens (const char* s) :src(s) {;} + + void init (const char* s) {src = s;} + bool scan (); + bool isWord () const {return isw;} + const char* lex () const {return buff;} + +protected: + + const char* src; + char* dst; + char buff[64]; + bool isw; +}; + + +//======================================================================================== +// BoolMatrix - Symmetric boolean matrix +//======================================================================================== + + +class RegAlloc4::BoolMatrix +{ +public: + + BoolMatrix (MemoryManager&, size_t); + + void clear (); + void clear (int i, int j) {at((unsigned)i, (unsigned)j); *ptr &= ~msk;} + void set (int i, int j) {at((unsigned)i, (unsigned)j); *ptr |= msk;} + bool test (int i, int j) {at((unsigned)i, (unsigned)j); return (*ptr & msk) != 0;} + +private: + + void at (unsigned i, unsigned j) + { + assert((size_t)i < dim && (size_t)j < dim); + + const unsigned bitn = (i < j) ? j*(j-1)/2 + i + : i*(i-1)/2 + j; + + msk = (char)(1 << (bitn & 7)); + ptr = base + (bitn >> 3); + } + + size_t dim, dims; + char* base; + + char msk; + char* ptr; +}; + + +RegAlloc4::BoolMatrix::BoolMatrix (MemoryManager& mm, size_t d) +{ + assert(d > 0); + dim = d; + dims = (dim*(dim - 1)) >> 4; // /16 + base = new (mm) char[dims]; + clear(); +} + + +void RegAlloc4::BoolMatrix::clear () +{ + memset(base, 0, dims); +} + + +//======================================================================================== +// Registers implementation +//======================================================================================== + + +// Parse input parameters (registers available) and build table of the regsiters +// available for allocalion ('registers'). +// +void RegAlloc4::Registers::parse (const char* params) +{ + if (params == 0 || strcmp(params, "ALL") == 0) + { +#ifdef _EM64T_ + push_back(Constraint(RegName_RAX) + |Constraint(RegName_RCX) + |Constraint(RegName_RDX) + |Constraint(RegName_RBX) + |Constraint(RegName_RSI) + |Constraint(RegName_RDI) + |Constraint(RegName_RBP) + |Constraint(RegName_R8) + |Constraint(RegName_R9) + |Constraint(RegName_R10) + |Constraint(RegName_R11) + |Constraint(RegName_R12)); +#else + push_back(Constraint(RegName_EAX) + |Constraint(RegName_ECX) + |Constraint(RegName_EDX) + |Constraint(RegName_EBX) + |Constraint(RegName_ESI) + |Constraint(RegName_EDI) + |Constraint(RegName_EBP)); +#endif + push_back(Constraint(RegName_XMM0) + |Constraint(RegName_XMM1) + |Constraint(RegName_XMM2) + |Constraint(RegName_XMM3) + |Constraint(RegName_XMM4) + |Constraint(RegName_XMM5) + |Constraint(RegName_XMM6) + |Constraint(RegName_XMM7)); + } + else + { + Constraint c; + for (Tokens t(params); t.scan(); ) + if (t.isWord()) + { + RegName r = getRegName(t.lex()); + if (r != RegName_Null) + c = Constraint(r); + + merge(c, true); + } + } + + assert(!empty()); + + for (unsigned i = 0; i != IRMaxRegKinds; ++i) + indexes[i] = -1; + + for (unsigned i = 0; i != size(); ++i) + indexes[operator[](i).getKind()] = (int)i; +} + + +int RegAlloc4::Registers::merge (const Constraint& c, bool add) +{ + if (c.getMask() != 0) + { + for (unsigned i = 0; i != size(); ++i) + { + Constraint& r = operator[](i); + if (r.getKind() == c.getKind()) + { + r.setMask(r.getMask() | c.getMask()); + return (int)i; + } + } + + if (add) + push_back(c); + } + + return -1; +} + + +int RegAlloc4::Registers::index (const Constraint& c) const +{ + return indexes[c.getKind() & OpndKind_Reg]; +} + + +//======================================================================================== +// RegPress implementation +//======================================================================================== + +//Initialization for register pressure element +RegAlloc4::RegPress::RegPress (MemoryManager & mm) : regMasksList(mm) { + elementsNum = 0; +} + +//Add new constraint +void RegAlloc4::RegPress::add (RegMask mask) { + int maskSize = bitCount(mask); + assert(maskSize != 0 && maskSize <= IRMaxRegNamesSameKind); + + //Increase elements number + elementsNum++; + //Put the mask right after more strict one. + if (!regMasksList.empty()) { + for (RegMasksList::iterator ite = regMasksList.begin(), end = regMasksList.end(); ite != end; ++ite) { + if (maskSize <= bitCount(*ite)) { + regMasksList.insert(ite, mask); + return; + } + } + } + //If list is empty or the constraint is less strict than existing one push the constraint to the + //end of the list + regMasksList.push_back(mask); +} + +//Remove constraint +void RegAlloc4::RegPress::remove (RegMask mask) { + assert(elementsNum > 0); + + //Decrease number of elements and zero the mask + elementsNum--; + for (RegMasksList::iterator ite = regMasksList.begin(), end = regMasksList.end(); ite != end; ++ite) { + if (*ite == mask) { + regMasksList.erase(ite); + return; + } + } + //Element wasn't found + assert(0); +} + +//Check consistence of the current set of masks +//This is simple algorithm for constraints propagation with linear +//complexity, it doesn't guarantee correct solution but works good +//for simple cases +boolean RegAlloc4::RegPress::isConsist () { + if (elementsNum > IRMaxRegNamesSameKind) { + return false; + } + + RegMask mask = 0; + int elements = 0; + if (!regMasksList.empty()) { + for (RegMasksList::const_iterator ite = regMasksList.begin(), end = regMasksList.end(); ite != end; ++ite) { + elements++; + //Merge masks + mask |= (*ite); + //Check that constraints are satisfied + if (bitCount(mask) < elements) { + return false; + } + } + } + return true; +} + + + +//======================================================================================== +// OpndxStack implementation +//======================================================================================== + + +void RegAlloc4::OpndxStack::push (Opndx* opndx) +{ + if (stack == 0) + stack = new (mm) StlVector(mm); + + stack->push_back(opndx); +} + + +void RegAlloc4::OpndxStack::pop () +{ + assert(stack != 0 && !stack->empty()); + stack->pop_back(); +} + + +RegAlloc4::Opndx* RegAlloc4::OpndxStack::top () const +{ + return (stack == 0 || stack->empty())? 0 : stack->back(); +} + + +bool RegAlloc4::OpndxStack::empty () const +{ + return stack == 0 || stack->empty(); +} + + +//======================================================================================== +// Opndx implementation +//======================================================================================== + + +void RegAlloc4::Opndx::note (Inst* inst, uint32 role, DefUse* twin) +{ + if (defuses.empty() || defuses.back().inst != inst) { + DefUse du; + du.inst = inst; + du.role = role; + defuses.push_back(du); + } else + defuses.back().role |= role; + + if (twin != 0) + defuses.back().role |= twin->role & (DefUse_Flags::Split | DefUse_Flags::Fork | DefUse_Flags::Join); +} + + +bool RegAlloc4::Opndx::active () const +{ + for (DefUses::const_iterator ptr = defuses.begin(), end = defuses.end(); ptr != end; ++ptr) + if ((ptr->role & DefUse_Flags::Split) == 0) + return true; + + return false; +} + + +void RegAlloc4::Opndx::computeCost () +{ + cost = 0; + + for (DefUses::iterator ptr = defuses.begin(), end = defuses.end(); ptr != end; ++ptr) { + CGNode* node = (CGNode*)ptr->inst->getNode(); + double excount = node->getExecCount() / node->getIRManager().getFlowGraph()->getEntryNode()->getExecCount(); + assert(excount > 0); + + ptr->cost = excount; + cost += excount; + } +} + + +void RegAlloc4::Opndx::summCost () +{ + cost = 0; + + for (DefUses::iterator ptr = defuses.begin(), end = defuses.end(); ptr != end; ++ptr) + cost += ptr->cost; +} + + +//======================================================================================== +// Graph implementation +//======================================================================================== + + +int RegAlloc4::Graph::add (Opndx* opndx) +{ + opndx->idx = opndxs.size(); + opndxs.push_back(opndx); + return opndx->idx; +} + + +void RegAlloc4::Graph::remap () +{ + unsigned opndcount = 0, + i; + Opndx* opndx; + Opnd* opnd; + + StlVector::iterator ptr, end = opndxs.end(); + for (ptr = opndxs.begin(); ptr != end; ++ptr) + if ((opndx = *ptr) != 0 && (opnd = opndx->opnd) != 0) + if ((i = opnd->getId()) > opndcount) + opndcount = i; + ++opndcount; + + opndmap.resize(opndcount); + for (i = 0; i != opndcount; ++i) + opndmap[i] = -1; + + unsigned x = 0; + for (ptr = opndxs.begin(); ptr != end; ++ptr, ++x) + if ((opndx = *ptr) != 0 && (opnd = opndx->opnd) != 0) { + i = opnd->getId(); + assert(opndmap[i] == -1); + opndmap[i] = x; + } +} + +void RegAlloc4::Graph::connect (int x1, int x2) +{ + Opndx* opndx1 = at(x1), + * opndx2 = at(x2); + if (opndx1->ridx == opndx2->ridx) { + opndx1->adjacents.push_back(x2); + opndx2->adjacents.push_back(x1); + } +} + + +int RegAlloc4::Graph::disconnect (int x) +{ +// Node to be disconnected + Opndx* opndx = at(x); + if (opndx->adjacents.empty()) + return 0; + + int disc = 0; + + for (Indexes::iterator k = opndx->adjacents.begin(); k != opndx->adjacents.end(); ++k) { + // this node is adjacent to the node to be disconnected + Opndx* adjopndx = at(*k); + if (!adjopndx->adjacents.empty()) { + moveNodes(adjopndx->adjacents, adjopndx->hiddens, x); + if (adjopndx->adjacents.empty()) + disc++; + } + } + + opndx->hiddens.splice(opndx->hiddens.begin(), opndx->adjacents); + + return ++disc; +} + + +void RegAlloc4::Graph::reconnect (int x) +{ +// Node to be reconnected + Opndx* opndx = at(x); + + for (Indexes::iterator k = opndx->hiddens.begin(); k != opndx->hiddens.end(); ++k) { + // this node was adjacent to the node to be reconnected + Opndx* adjopndx = at(*k); + moveNodes(adjopndx->hiddens, adjopndx->adjacents, x); + } + + opndx->adjacents.splice(opndx->adjacents.begin(), opndx->hiddens); +} + + +void RegAlloc4::Graph::moveNodes (Indexes& from, Indexes& to, int x) const +{ + Indexes::iterator i; + while ((i = find(from.begin(), from.end(), x)) != from.end()) + to.splice(to.begin(), from, i); +} + + +//======================================================================================== +// RegAlloc4 implementation +//======================================================================================== + + +void RegAlloc4::runImpl () +{ + getIRManager().fixEdgeProfile(); + + registers.parse(getArg("regs")); + DBGOUT("parameters: " << registers << endl;) + + getArg("SORT", flag_SORT = 2); + getArg("NEIGBH", flag_NEIGBH = true); + getArg("COALESCE", flag_COALESCE = true); + +#ifdef _DEBUG_REGALLOC4 + iop = 0; + if (isLogEnabled("cfg")) { + iop = &IOp::make(); + iop->xml_create(&log("cfg").out()); + } +#endif + + coalesceCount = 0; + int insertCount = instCount(getIRManager()); + + DBGOUT(endl << "passnb 1" << endl;) + if (buildGraph()) + { + pruneGraph(); + if (!assignRegs()) + {// second pass: "spill everywhere" +/*** + bool flag_SPILL; + getArg("SPILL", flag_SPILL = false); + if (flag_SPILL) + { + spillRegs(); + getIRManager().calculateLivenessInfo(); + + DBGOUT(endl << "passnb 2" << endl;) + buildGraph(); + pruneGraph(); + assignRegs(); + } +***/ + } + + insertCount = instCount(getIRManager()) - insertCount; + DBGOUT("insert count " << insertCount << endl;) + +#ifdef _DEBUG_REGALLOC4 + LogStream& xmlstream = log("xmldump"); + if (xmlstream.isEnabled()) + { + IRXMLDump xmldump(getIRManager(), &xmlstream.out()); + xmldump.write(); + } + + delete iop; +#endif + } + + count_coalesced += coalesceCount; + + SpillGen(); +} + + +bool RegAlloc4::verify (bool force) +{ + bool failed = false; + if (force || getVerificationLevel() >=2 ) + { + RegAllocCheck chk(getIRManager()); + if (!chk.run(false)) + failed = true; + if (!SessionAction::verify(force)) + failed = true; + } + + return !failed; +} + + +void RegAlloc4::SpillGen () +{ +/*** + bool runSpillGen = false; + + for (unsigned i = 0, opandcount = getIRManager().getOpndCount(); i != opandcount; ++i) + { + Opnd* opnd = getIRManager().getOpnd(i); + if (opnd->getConstraint(Opnd::ConstraintKind_Location, OpndSize_Default).isNull()) + if (opnd->getConstraint(Opnd::ConstraintKind_Calculated, OpndSize_Default).getKind() == OpndKind_Memory) + { + opnd->assignMemLocation(MemOpndKind_StackAutoLayout, getIRManager().getRegOpnd(STACK_REG), 0); + DBGOUT("assigned to mem " << *opnd << endl;) + } + else + runSpillGen = true; + } + + bool* spill_flag = new (getIRManager().getMemoryManager()) bool(runSpillGen); + getIRManager().setInfo("SpillGen", spill_flag); + DBGOUT("runSpillGen:" << runSpillGen << endl;) +***/ +} + + +bool RegAlloc4::buildGraph () +{ + static CountTime buildGraphTimer("ia32::RegAlloc4::buildGraph"); + AutoTimer tm(buildGraphTimer); + + const unsigned opandcount = getIRManager().getOpndCount(); + graph.clear(); + graph.reserve(opandcount); + +// Scan all the operands available and see if operand is already assigned +// or need to be assigned + + for (unsigned i = 0; i != opandcount; ++i) + { + Opnd* opnd = getIRManager().getOpnd(i); + + int ridx; + Constraint loc = opnd->getConstraint(Opnd::ConstraintKind_Location, OpndSize_Default); + if (loc.isNull()) + {// this operand is not allocated yet + loc = opnd->getConstraint(Opnd::ConstraintKind_Calculated, OpndSize_Default); + if ((ridx = registers.index(loc)) != -1) + {// operand should be assigned to register + Opndx* opndx = new (mm) Opndx(mm); + opndx->opnd = opnd; + opndx->ridx = ridx; + opndx->avail = loc.getMask() & registers[ridx].getMask(); + opndx->nbavails = bitCount(opndx->avail); + assert(opndx->nbavails != 0); + opndx->cost = 1; + graph.add(opndx); + } + } + } + + if (graph.empty()) + return false; + + graph.remap(); + +//showGraph(); +//showOpands("initial", false); + + splitOpnds(); + normalize(); + graph.remap(); + for (unsigned x = 0; x != graph.size(); ++x) + if (Opndx* opndx = graph[x]) + opndx->computeCost(); + +//showGraph("After normalize"); +//showOpands("normalize"); + + setupConstraints(); //??? + setupSaturations(); + checkSaturations(); + + mergeOpndxs(); + checkSaturations(); + +//showGraph("Before spillOpndxs"); +//showOpands("merge"); + + spillOpndxs(); + cleanupSplits(); + + setupConstraints(); + +//showGraph("After cleanupSplits"); +//showOpands("cleanup"); + +// Create graph node for each register available (required befor processNodes) + + xregbase = graph.size(); // graph index of the first register available + graph.reserve(graph.size() + registers.size()); + + int ridx = 0; + for (Registers::iterator it = registers.begin(), end = registers.end(); it != end; ++it, ++ridx) + for (RegMask msk = it->getMask(), mk = 1; msk != 0; mk <<= 1) + if ((msk & mk) != 0) + { + msk ^= mk; + Opndx* opndx = new (mm) Opndx(mm); + opndx->opnd = 0; + opndx->ridx = ridx; + opndx->alloc = mk; + graph.add(opndx); + } + + graph.remap(); + graphsize = graph.size(); + BoolMatrix matrix(mm, graphsize); + +// Iterate over all instructions in CFG and calculate which operands +// are live simultaneously (result stored in matrix) + + processNodes(matrix); + +// Connect nodes that represent simultaneously live operands + + for (unsigned x1 = 1; x1 < graphsize; ++x1) + for (unsigned x2 = 0; x2 < x1; ++x2) + if (matrix.test(x1, x2)) + graph.connect(x1, x2); + +// Do iterative coalescing + + if (flag_COALESCE) + while (coalescing(matrix)) + /*nothing*/; + +//showGraph("Before prune"); +//showOpands("alloc"); + + getIRManager().getFlowGraph()->purgeEmptyNodes(); + return true; +} + +bool RegAlloc4::isForkNode (Node * node) { + assert(node->isBlockNode()); + if (node->getOutDegree() > 1) { + Edges::const_iterator edge_ptr, edge_end; + const Edges& outs = node->getOutEdges(); + uint32 outNodes = 0; + for (edge_ptr = outs.begin(), edge_end = outs.end(); edge_ptr != edge_end; ++edge_ptr) { + Node* dst_node = (*edge_ptr)->getTargetNode(); + if (dst_node->isBlockNode()) { + outNodes++; + } + } + if (outNodes > 1) { + return true; + } + } + return false; +} + +void RegAlloc4::splitOpnds () { + DBGOUT("Operand splitting" << endl;) + + Edges::const_iterator edge_ptr, + edge_end; + Opndx* opndx; + + const Nodes& nodes = getIRManager().getFlowGraph()->getNodesPostOrder(); + for (Nodes::const_iterator it = nodes.begin(), end = nodes.end(); it != end; ++it) { + Node* node = *it; + if (node->isBlockNode()) { + //Is it a fork node ? + if (isForkNode(node)) + { + const Edges& outs = node->getOutEdges(); + for (edge_ptr = outs.begin(), edge_end = outs.end(); edge_ptr != edge_end; ++edge_ptr) + { + Node* dst_node = (*edge_ptr)->getTargetNode(); + + // If destination node has several input edges, leave processing to join-node case + if (dst_node->getInDegree() > 1) + continue; + + if (dst_node->isDispatchNode()) + { + if (dst_node->getOutDegree() == 1) + dst_node = dst_node->getOutEdges().at(0)->getTargetNode(); + else + continue; + } + + if (!dst_node->isBlockNode()) { + continue; + } + + // Check operands that live on exit from the fork node and on entry to destination nodes + + BitSet::IterB ib(*getIRManager().getLiveAtEntry(dst_node)); + for (int i = ib.getNext(); i != -1; i = ib.getNext()) { + if ((opndx = graph.getOpndx(i)) != 0 && opndx->isCandidate()) { + Opndx* split_opndx = splitOpnd(opndx); + // Insert copy instruction at the start of destination node + Inst* new_inst = insertCopy(true, dst_node, opndx->opnd); + split_opndx->note(new_inst, + Inst::OpndRole_Def | DefUse_Flags::Split | DefUse_Flags::Fork); + } + } + } + } + + //Is it a join node ? + if (node->getInDegree() > 1) { + BitSet* lives = getIRManager().getLiveAtEntry(node); + BitSet::IterB ib(*lives); + + bool is_candidates = false; + for (int i = ib.getNext(); i != -1; i = ib.getNext()) + { + Opnd* opnd = getIRManager().getOpnd(i); + opndx = graph.getOpndx(i); + if (opndx != 0 && opndx->isCandidate()) + { + is_candidates = true; + break; + } + } + if (!is_candidates) + continue; + + const Edges& ins = node->getInEdges(); + + Edges* edges = new Edges(mm); + edges->reserve(ins.size()); + for (edge_ptr = ins.begin(), edge_end = ins.end(); edge_ptr != edge_end; ++edge_ptr) + if ((*edge_ptr)->getSourceNode() != (*edge_ptr)->getTargetNode()) + edges->push_back(*edge_ptr); + + if (edges->size() < 2) + return; + + //Special processing for dispatch nodes and catch instruction + Inst* catch_inst = (Inst*)node->getFirstInst(); + if (catch_inst != 0 && catch_inst->hasKind(Inst::Kind_CatchPseudoInst)) + { + Opnd* catch_opnd = catch_inst->getOpnd(0); + lives->setBit(catch_opnd->getId()); + catch_inst->unlink(); + + for (edge_ptr = edges->begin(), edge_end = edges->end(); edge_ptr != edge_end; ++edge_ptr) { + Node* src_node = (*edge_ptr)->getSourceNode(), + * new_node = insertNode(*edge_ptr); + + Inst* new_inst = getIRManager().newCatchPseudoInst(catch_opnd); + new_node->prependInst(new_inst); + } + + edges->resize(ins.size()); + copy(ins.begin(), ins.end(), edges->begin()); + } + + for (edge_ptr = edges->begin(), edge_end = edges->end(); edge_ptr != edge_end; ++edge_ptr) { + Node* src_node = (*edge_ptr)->getSourceNode(); + // If source node has several output edges, split the edge + if (src_node->getOutDegree() > 1) + insertNode(*edge_ptr); + } + + // Check operands that live on entry to the join node + + ib.init(*lives); + for (int i = ib.getNext(); i != -1; i = ib.getNext()) { + opndx = graph.getOpndx(i); + if (opndx != 0 && opndx->isCandidate()) { + Opndx* split_opndx = splitOpnd(opndx); + split_opndx->firstbb = node; + + for (edge_ptr = ins.begin(), edge_end = ins.end(); edge_ptr != edge_end; ++edge_ptr) { + Node* src_node = (*edge_ptr)->getSourceNode(); + // Insert copy instruction at the end of source node + Inst* new_inst = insertCopy(false, src_node, opndx->opnd); + split_opndx->note(new_inst, + Inst::OpndRole_Def | DefUse_Flags::Split | DefUse_Flags::Join); + } + } + } + } + } + } + + DominatorBuilder dombuild; + DominatorTree* domtree = dombuild.computeDominators(mm, getIRManager().getFlowGraph()); + domtreeWalk(domtree->getDominatorRoot(), 0); + + for (unsigned x = 0; x != graph.size(); ++x) { + if (Opndx* opndx = graph[x]) { + assert(opndx->opstack.empty()); + + if (opndx->splits && !opndx->defuses.empty()) { + Opndx* split_opndx = splitOpnd(opndx); + DefUses& split_defuses = split_opndx->defuses; + DefUses& root_defuses = opndx->defuses; + split_defuses.splice(split_defuses.end(), root_defuses, root_defuses.begin(), root_defuses.end()); + } + } + } +} + + +void RegAlloc4::domtreeWalk (DominatorNode* domnode, int depth) { +// List of all sub-operands defined in the processed node + StlVector actives(mm); + + Node* node = domnode->getNode(); + if (node->isBlockNode()) { + // Implement join-node split operands from join nodes + BitSet::IterB ib(*getIRManager().getLiveAtEntry(node)); + for (int i = ib.getNext(); i != -1; i = ib.getNext()) + if (Opndx* root_opndx = graph.getOpndx(i)) + if (root_opndx->splits != 0) + for (OpndxList::iterator ptr = root_opndx->splits->begin(), + end = root_opndx->splits->end(); + ptr != end; ++ptr) { + + Opndx* split_opndx = *ptr; + if (split_opndx->firstbb == node) { + root_opndx->opstack.push(split_opndx); + actives.push_back(root_opndx); + } + } + + for (Inst* inst = (Inst*)node->getFirstInst(), * inst_next; inst != 0; inst = inst_next) { + inst_next = inst->getNextInst(); + + if (inst->getMnemonic() == Mnemonic_MOV && inst->getOpnd(0) == inst->getOpnd(1)) + if (Opndx* root_opndx = graph.getOpndx(inst->getOpnd(0)->getId())) { + Opndx* split_opndx = 0; + DefUses::iterator du_ptr; + DefUse* du = 0; + if (root_opndx->splits != 0) + { + OpndxList::iterator ptr = root_opndx->splits->begin(), + end = root_opndx->splits->end(); + for (; ptr != end; ++ptr) { + DefUses& defuses = (*ptr)->defuses; + du_ptr = find(defuses.begin(), defuses.end(), inst); + if (du_ptr != defuses.end()) + { + du = &*du_ptr; + assert(du->role & Inst::OpndRole_Def); + assert(du->role & DefUse_Flags::Split); + assert(du->role & (DefUse_Flags::Fork | DefUse_Flags::Join)); + split_opndx = *ptr; + break; + } + } + } + + // special processing for copy/split MOV instruction + + if (split_opndx != 0) {// Register use-point of current split + Opndx* act = root_opndx->opstack.top(); + if (act == split_opndx) { + split_opndx->defuses.erase(du_ptr); + assert(!split_opndx->defuses.empty()); + inst->unlink(); + } else { + (act == 0 ? root_opndx : act)->note(inst, Inst::OpndRole_Use, du); + + // Setup new split + root_opndx->opstack.push(split_opndx); + actives.push_back(root_opndx); + } + continue; + } + } + + // all other instructions + + for (InstOpnds inops(inst, Inst::OpndRole_All, false); inops.hasMore(); inops.next()) { + if (Opndx* root_opndx = graph.getOpndx(inops.getOpnd()->getId())) { + Opndx* act = root_opndx->opstack.top(); + (act == 0 ? root_opndx : act)->note(inst, inops.getRole()); + } + } + } + } + + if (domnode->getChild() != 0) + domtreeWalk(domnode->getChild(), depth+1); + +// Popup sub-operands defined in the processed node + + for (StlVector::reverse_iterator ptr = actives.rbegin(), + end = actives.rend(); + ptr != end; ++ptr) { + Opndx* root_opndx = *ptr; + root_opndx->opstack.pop(); + } + + if (domnode->getSiblings() != 0) + domtreeWalk(domnode->getSiblings(), depth); +} + + +RegAlloc4::Opndx* RegAlloc4::splitOpnd (Opndx* root_opndx) { + Opndx* split_opndx = new Opndx(mm); + split_opndx->firstbb = 0; + split_opndx->ridx = root_opndx->ridx; + split_opndx->avail = root_opndx->avail; + split_opndx->nbavails = root_opndx->nbavails; + graph.add(split_opndx); + if (root_opndx->splits == 0) + root_opndx->splits = new (mm) OpndxList(mm); + root_opndx->splits->push_back(split_opndx); + return split_opndx; +} + + +void RegAlloc4::normalize () { + DBGOUT("Normalization" << endl;) + + for (unsigned x = 0; x != graph.size(); ++x) + if (Opndx* root_opndx = graph[x]) + if (root_opndx->splits != 0 && !root_opndx->splits->empty()) { + for (OpndxList::iterator opndx_ptr = root_opndx->splits->begin(), + opndx_end = root_opndx->splits->end(); + opndx_ptr != opndx_end; ++opndx_ptr) { + + Opndx* split_opndx = *opndx_ptr; + assert(split_opndx); + assert(split_opndx->opnd == 0); + + split_opndx->opnd = getIRManager().newOpnd(root_opndx->opnd->getType(), + root_opndx->opnd->getConstraint(Opnd::ConstraintKind_Initial)); + split_opndx->ridx = root_opndx->ridx; + + for (DefUses::iterator ptr = split_opndx->defuses.begin(), + end = split_opndx->defuses.end(); + ptr != end; ++ptr) + { + DefUse& du = *ptr; + du.inst->replaceOpnd(root_opndx->opnd, split_opndx->opnd, du.role | Inst::OpndRole_ForIterator); + //DBGOUT("REPLACE " << *du.inst << (du.role & Inst::OpndRole_Def ? ":Def " : "") << (du.role & Inst::OpndRole_Use ? ":Use " : "") << " " << *root_opndx->opnd << " -> " << *split_opndx->opnd<< " role:" << du.role << endl;) + } + } + } + + getIRManager().calculateLivenessInfo(); + +#ifdef _DEBUG_REGALLOC4 + RegAllocCheck chk(getIRManager()); + chk.run(false); +#endif +} + + +void RegAlloc4::setupSaturations () +{ + DBGOUT("Setup saturations" << endl;) + + int regsavail [IRMaxRegKinds], + regsavail0[IRMaxRegKinds], + regkinds = (int)registers.size(), + ridx; + + for (ridx = 0; ridx < regkinds; ridx++) + regsavail0[ridx] = bitCount(registers[ridx].getMask()); + + RegPress * regpress[IRMaxRegKinds]; + for (int i = 0; i < IRMaxRegKinds; i++) + regpress[i] = new(mm) RegPress(mm); + + Saturation* sp = 0; + + const uint32 iropnds = getIRManager().getOpndCount(); + BitSet lives(mm, iropnds); + + const Nodes& nodes = getIRManager().getFlowGraph()->getNodesPostOrder(); + Node* node; + for (Nodes::const_iterator it = nodes.begin(); it != nodes.end(); ++it) + if ((node = *it)->isBlockNode()) + { + // start with the operands at the block bottom + Inst* inst = (Inst*)node->getLastInst(); + if (inst == 0) + continue; + + getIRManager().getLiveAtExit(node, lives); + + for (ridx = 0; ridx < regkinds; ridx++) + regsavail[ridx] = regsavail0[ridx]; + + BitSet::IterB ib(lives); + for (int i = ib.getNext(); i != -1; i = ib.getNext()) + { + Opndx* opndx = graph.getOpndx(i); + //if (opndx != 0) + // --regsavail[opndx->ridx]; + +// if (opndx != 0) +// regpress[opndx->ridx].incr(opndx->avail); + if (opndx != 0) + regpress[opndx->ridx]->add(opndx->avail); + } + + // iterate over instructions towards the top of the block + for (;;) + { + for (ridx = 0; ridx != regkinds; ++ridx) + { + int regs = regsavail[ridx]; + + if (inst->getMnemonic() == Mnemonic_CALL) + { + OpndKind k = static_cast(registers[ridx].getKind()); + Constraint ci = static_cast(inst)->getCalleeSaveRegs(k); + regs -= regsavail0[ridx] - bitCount(registers[ridx].getMask() & ci.getMask()); + + Opndx* opndx; + if ((inst->getOpndRoles(0) & Inst::OpndRole::OpndRole_UseDef) == Inst::OpndRole::OpndRole_Def + && (opndx = graph.getOpndx(inst->getOpnd(0)->getId())) != 0 && opndx->ridx == ridx) + ++regs; + } + + //RegMask over = regpress[ridx].getOverflow(); +// if (regs < 0) + + // this is saturation point + if (regs < 0 || !regpress[ridx]->isConsist()) { +// if (regs < 0) { + Saturation* sp = new (mm) Saturation(mm); + sp->inst = inst; + sp->regsavail = regs; + saturations.push_back(sp); + + BitSet::IterB ib(lives); + for (int i = ib.getNext(); i != -1; i = ib.getNext()) + { + Opndx* opndx = graph.getOpndx(i); + if (opndx != 0 && opndx->ridx == ridx) + { + sp->lives.push_back(opndx); + if (opndx->saturations == 0) + opndx->saturations = new (mm) SaturationList(mm); + opndx->saturations->push_back(sp); + } + } + } + } + + if (inst->getPrevInst() == 0) + break; + + Inst::Opnds opnds(inst, Inst::OpndRole_All); + for (uint32 i = opnds.begin(); i != opnds.end(); i = opnds.next(i)) + { + Opnd* opnd = inst->getOpnd(i); + Opndx* opndx = graph.getOpndx(opnd->getId()); + if (opndx != 0) + { + int& avail = regsavail[opndx->ridx]; + bool was = lives.getBit(opnd->getId()); + if (was && inst->isLiveRangeEnd(i)) + { + lives.setBit(opnd->getId(), false); + ++avail; + //regpress[opndx->ridx].decr(opndx->avail); + regpress[opndx->ridx]->remove(opndx->avail); + } + else if (!was && inst->isLiveRangeStart(i)) + { + lives.setBit(opnd->getId(), true); + --avail; + //regpress[opndx->ridx].incr(opndx->avail); + regpress[opndx->ridx]->add(opndx->avail); + } + } + } + + inst = inst->getPrevInst(); + } +/* +#ifdef _DEBUG_REGALLOC4 + int work[IRMaxRegKinds]; + for (ridx = 0; ridx != regkinds; ++ridx) + work[ridx] = bitCount(registers[ridx].getMask()); + + ib.init(lives); + for (int i = ib.getNext(); i != -1; i = ib.getNext()) + { + Opnd* opnd = getIRManager().getOpnd(i); + ridx = registers.index(opnd->getConstraint(Opnd::ConstraintKind_Initial, OpndSize_Default)); + if (ridx != -1) + --work[ridx]; + } + + for (ridx = 0; ridx != regkinds; ++ridx) + assert(work[ridx] == regsavail[ridx]); +#endif +*/ + } + +#ifdef _DEBUG_REGALLOC4 + int saturation_count = 0; + log(LogStream::DBG) << "Saturation points" << endl; + for (SaturationList::iterator si = saturations.begin(); si != saturations.end(); ++si) + { + Saturation* sp = *si; + Sep s; + log(LogStream::DBG).out() << " " << *sp->inst << " regs:" << sp->regsavail /**<< " over:" << hex << sp->overflow << dec **/ << " "; + for (OpndxList::iterator op = sp->lives.begin(); op != sp->lives.end(); ++op) + log(LogStream::DBG).out() << s << *(*op)->opnd; + log(LogStream::DBG) << endl; + ++saturation_count; + } + log(LogStream::DBG) << " total:" << saturation_count << endl; +#endif +} + + +void RegAlloc4::checkSaturations () +{ +#ifdef _DEBUG_REGALLOC4 + DBGOUT("checkSaturations" << endl;) + + const int opndx_count = graph.size(), + saturation_count = saturations.size(); + + SaturationList::iterator ptr = saturations.begin(), + end = saturations.end(); + + int k; + + StlVector matrix(mm, saturation_count); + for (k = 0; k != saturation_count; ++k, ++ptr) + { + Saturation* sp = *ptr; + sp->idx = k; + matrix[k] = new (mm) BitSet(mm, opndx_count); + } + + assert(ptr == end); + + for (int x = 0; x != opndx_count; ++x) + if (Opndx* opndx = graph[x]) + { + assert(opndx->idx == x); + if (opndx->saturations) + for (ptr = opndx->saturations->begin(), + end = opndx->saturations->end(); + ptr != end; ++ptr) + { + Saturation* sp = *ptr; + BitSet* bsp = matrix.at(sp->idx); + assert(!bsp->getBit(x)); + bsp->setBit(x, true); + } + } + + BitSet work(mm, opndx_count); + for (ptr = saturations.begin(), + end = saturations.end(), + k = 0; + ptr != end; ++ptr, ++k) + { + Saturation* sp = *ptr; + + assert(sp->inst->getNode()); + + work.clear(); + for (OpndxList::iterator op_ptr = sp->lives.begin(), + op_end = sp->lives.end(); + op_ptr != op_end; ++op_ptr) + { + Opndx* opndx = *op_ptr; + assert(!work.getBit(opndx->idx)); + work.setBit(opndx->idx, true); + } + + assert(work.isEqual(*matrix[k])); + } +#endif +} + + +void RegAlloc4::mergeOpndxs () +{ + DBGOUT("Merge opnds" << endl;) + + int merged = 0; + for (unsigned x = 0; x != graph.size(); ++x) + if (Opndx* root_opndx = graph[x]) + if (root_opndx->splits != 0 && !root_opndx->splits->empty()) + { + for (OpndxList::iterator opndx_ptr = root_opndx->splits->begin(), + opndx_end = root_opndx->splits->end(), + opndx_nxt; + opndx_ptr != opndx_end; opndx_ptr = opndx_nxt) + { + opndx_nxt = opndx_ptr; ++opndx_nxt; + Opndx* opndx0 = *opndx_ptr; + + DefUses& defuses0 = opndx0->defuses; + for (DefUses::iterator ptr0 = defuses0.begin(), nxt0; + ptr0 != defuses0.end(); ptr0 = nxt0) + { + nxt0 = ptr0; ++nxt0; + + if ((ptr0->role & Inst::OpndRole_Use) && (ptr0->role & DefUse_Flags::Split)) + { + assert(ptr0->inst->getMnemonic() == Mnemonic_MOV); + Opndx* opndx1 = graph.getOpndx(ptr0->inst->getOpnd(0)->getId()); + assert(opndx1 != 0); + assert(opndx1 != opndx0); + + if (shouldMerge(opndx0, opndx1, &*ptr0)) + { + merge(opndx0, opndx1); + root_opndx->splits->remove(opndx1); + nxt0 = defuses0.begin(); // restart iteration + ++merged; + } + } + } + } + } + + DBGOUT("merged " << merged << endl;) + + if (merged != 0) + getIRManager().calculateLivenessInfo(); +} + + +// Check possibility of merging (coalescing) of two sub-operannds of the same root-operands. +// In the case of positive decision, the second sub-operands (opndx1) will be removed. + + +#define DLESS(a,b) (a - b) < 0.0001*(a + b) + +bool RegAlloc4::shouldMerge (Opndx* opndx0, Opndx* opndx1, DefUse* du) const +{ + //If number of saturation points is 0 - operand is + //insignificant (i.e. sig = false) + const bool sig0 = (opndx0->significance() != 0), + sig1 = (opndx1->significance() != 0); + + if (!sig0 && !sig1) + //return false; + return true; + + const bool act0 = opndx0->active(), + act1 = opndx1->active(); + +// Rule 1 + + if (sig0 && !act0 && + sig1 && !act1) + { + DBGOUT(" rule 1 ";) + return true; + } + +// Rule 2 + + if ((sig0 && !sig1) || (!sig0 && sig1)) + { + double cost_merged = opndx0->cost + opndx1->cost; + for (DefUses::iterator ptr = opndx0->defuses.begin(), + end = opndx0->defuses.end(); + ptr != end; ++ptr) + if (ptr->role & DefUse_Flags::Split) + { + int k = (ptr->role & Inst::OpndRole_Use) ? 0 : 1; + if (opndx1->opnd == ptr->inst->getOpnd(k)) + { + cost_merged -= ptr->cost * 2.0; + } + } + + double cost = sig0 ? opndx0->cost : opndx1->cost; + DBGOUT(" rule 2 ? " << cost_merged << " : " << cost << endl;) + if (DLESS(cost_merged, cost)) // (cost_merged <= cost) + { + DBGOUT(" rule 2 ";) + return true; + } + else + return false; + } + +// Rule 3 + + CGNode* node = (CGNode*)du->inst->getNode(); + ControlFlowGraph* cfg = getIRManager().getFlowGraph(); + double excount0 = cfg->getEntryNode()->getExecCount(), + excount1 = node->getExecCount() / excount0; + + if (du->role & DefUse_Flags::Fork) + { + assert(node->getInDegree() == 1); + node = (CGNode*)node->getInEdges().front()->getSourceNode(); + } + else + { + assert(node->getOutDegree() == 1); + node = (CGNode*)node->getOutEdges().front()->getTargetNode(); + } + + double excount2 = node->getExecCount() / excount0, + exdiff = max(excount1, excount2) / 2.0; + + if (abs(excount1 - excount2) <= exdiff) + { + DBGOUT(" rule 3 ";) + return true; + } + +// No rules applicable + + return false; +} + + +void RegAlloc4::merge (Opndx* opndx0, Opndx* opndx1) +{ + DBGOUT(" merging (" << opndx0->idx << ") " << *opndx0->opnd << " with (" << opndx1->idx << ") " << *opndx1->opnd << endl;) + + opndx0->avail &= opndx1->avail; + opndx0->nbavails = bitCount(opndx0->avail); + + DefUses& defuses1 = opndx1->defuses; + for (DefUses::iterator ptr = defuses1.begin(), + end = defuses1.end(); + ptr != end; ++ptr) + ptr->inst->replaceOpnd(opndx1->opnd, + opndx0->opnd, + ptr->role | Inst::OpndRole_ForIterator); + + + DefUses& defuses0 = opndx0->defuses; + defuses0.splice(defuses0.end(), defuses1, defuses1.begin(), defuses1.end()); + + StlVector removed(mm); + + for (DefUses::iterator ptr = defuses0.begin(), + end = defuses0.end(), + nxt; + ptr != end; ptr = nxt) + { + nxt = ptr; ++nxt; + if (ptr->role & DefUse_Flags::Split) + { + Inst* inst = ptr->inst; + assert(inst->getMnemonic() == Mnemonic_MOV); + Opnd* opnd = inst->getOpnd(0); + if (opnd == inst->getOpnd(1)) + if (opnd == opndx0->opnd || opnd == opndx1->opnd) + { + defuses0.erase(ptr); + if (inst->getNode()) + { + removed.push_back(inst); + inst->unlink(); + DBGOUT(" removing " << *inst << endl;) + } + } + } + } + + graph.at(opndx1->idx) = 0; + opndx0->summCost(); + + if (saturations.empty()) + return; + + if (opndx1->saturations != 0) + { + if (opndx0->saturations == 0) + opndx0->saturations = new (mm) SaturationList(mm); + + SaturationList& sats0 = *opndx0->saturations, + & sats1 = *opndx1->saturations; + + + for (SaturationList::iterator ptr = sats1.begin(), + end = sats1.end(), + nxt; + ptr != end; ptr = nxt) + { + nxt = ptr; ++nxt; + + Saturation* sp = *ptr; + OpndxList::iterator op_ptr = find(sp->lives.begin(), sp->lives.end(), opndx1); + assert(op_ptr != sp->lives.end()); + + SaturationList::iterator sp_ptr = find(sats0.begin(), sats0.end(), sp); + if (sp_ptr == sats0.end()) + { + *op_ptr = opndx0; + sats0.splice(sats0.end(), sats1, ptr); + } + else + { + sp->lives.erase(op_ptr); + if (++sp->regsavail >= 0) + { + DBGOUT(" SP relaxed " << *sp->inst << endl;) + } + } + } + } + + for (SaturationList::iterator ptr = saturations.begin(), + end = saturations.end(), + nxt; + ptr != end; ptr = nxt) + { + nxt = ptr; ++nxt; + Saturation* sp = *ptr; + if (find(removed.begin(), removed.end(), sp->inst) != removed.end()) + { + DBGOUT(" removing SP at " << *sp->inst << endl;) + saturations.erase(ptr); + + for (OpndxList::iterator ptr = sp->lives.begin(), + end = sp->lives.end(); + ptr != end; ++ptr) + { + Opndx* opndx = *ptr; + opndx->saturations->remove(sp); + if (opndx->saturations->empty()) + { + opndx->saturations = 0; + DBGOUT(" cleared " << *opndx << endl;) + } + } + } + } +} + + +void RegAlloc4::spillOpndxs () +{ + int spilled = 0; + + OpndxList candidates(mm); + for (unsigned x = 0; x != graph.size(); ++x) + if (Opndx* opndx = graph[x]) + if (opndx->significance() > 0) + candidates.push_back(opndx); + + Opndx* spill_opndx; + +// First pass: spill inactive operands + + DBGOUT("spillOpnds 1" << endl;) + while (!saturations.empty()) + { + spill_opndx = 0; + + for (OpndxList::iterator ptr = candidates.begin(), + end = candidates.end(); + ptr != end; ++ptr) + { + Opndx* opndx = *ptr; + assert(opndx->significance() > 0); + if (!opndx->active()) + { + if (spill_opndx == 0 || opndx->significance() > spill_opndx->significance()) + spill_opndx = opndx; + } + } + + if (spill_opndx == 0) + break; + + spillOpndx(spill_opndx, candidates); + ++spilled; + } + +// Second pass: spill active operands + + DBGOUT("spillOpnds 2" << endl;) + while (!saturations.empty()) + { + spill_opndx = 0; + double work = DBL_MAX, w; + + for (OpndxList::iterator ptr = candidates.begin(), + end = candidates.end(); + ptr != end; ++ptr) + { + Opndx* opndx = *ptr; + assert(opndx->significance() > 0); + if ((w = opndx->cost/opndx->significance()) < work) + { + spill_opndx = opndx; + work = w; + } + } + + if (spill_opndx == 0) + break; + + spillOpndx(spill_opndx, candidates); + ++spilled; + } + + DBGOUT("spilled " << spilled << endl;) +} + + +void RegAlloc4::spillOpndx (Opndx* opndx, OpndxList& candidates) +{ + DBGOUT(" spill " << *opndx << endl;) + opndx->spill = true; + candidates.remove(opndx); + + if (SaturationList* sps = opndx->saturations) + for (SaturationList::iterator ptr = sps->begin(), + end = sps->end(), + nxt; + ptr != end; ptr = nxt) + { + nxt = ptr; ++nxt; + Saturation* sp = *ptr; + sp->lives.remove(opndx); + if (++sp->regsavail >= 0) + {// discard this saturation point + saturations.erase(ptr); + for (OpndxList::iterator ptr = sp->lives.begin(), + end = sp->lives.end(); + ptr != end; ++ptr) + { + Opndx* opndx = *ptr; + opndx->saturations->remove(sp); + if (opndx->significance() == 0) + candidates.remove(opndx); + } + } + } +} + + +void RegAlloc4::cleanupSplits () +{ + DBGOUT("Cleanup splits" << endl;) + int opnd_count = 0, + split_count = 0; + + saturations.clear(); + + for (unsigned x = 0; x != graph.size(); ++x) + if (Opndx* opndx = graph[x]) + { + if (opndx->splits) + { + OpndxList* splits = opndx->splits; + for (OpndxList::iterator ptr = splits->begin(), + end = splits->end(), + nxt; + ptr != end; ptr = nxt) + { + nxt = ptr; ++nxt; + Opndx* split_opndx = *ptr; + if (!split_opndx->spill) + { + merge(opndx, split_opndx); + splits->erase(ptr); + } + else + ++split_count; + } + //opndx->splits = 0; + + if (opndx->defuses.empty()) + { + graph[x] = 0; + DBGOUT(" dropped " << *opndx << endl;) + } + } + ++opnd_count; + } + DBGOUT(" opnds:" << opnd_count << " splits:" << split_count << endl;) +} + + +void RegAlloc4::setupConstraints () +{ + DBGOUT("Calculating constraints" << endl;) + + getIRManager().calculateLivenessInfo(); + ConstraintsResolver cr(true); + cr.initz(); + cr.process(&getIRManager()); + + for (unsigned x = 0; x != graph.size(); ++x) + if (Opndx* opndx = graph[x]) + if (Opnd* opnd = opndx->opnd) + { + Constraint loc = opnd->getConstraint(Opnd::ConstraintKind_Location, OpndSize_Default); + if (loc.isNull()) + {// this operand is not allocated yet + loc = opnd->getConstraint(Opnd::ConstraintKind_Calculated, OpndSize_Default); + int ridx = registers.index(loc); + if (ridx != -1) + {// operand should be assigned to register + opndx->ridx = ridx; + opndx->avail = loc.getMask() & registers[ridx].getMask(); + opndx->nbavails = bitCount(opndx->avail); + assert(opndx->nbavails != 0); + } + } + } +} + + +// Iterate over all instructions in CFG and calculate which operands +// are live simultaneously (result stored in matrix) +// +void RegAlloc4::processNodes (BoolMatrix& matrix) { + BitSet lives(mm, getIRManager().getOpndCount()); + + const Nodes& nodes = getIRManager().getFlowGraph()->getNodesPostOrder(); + for (Nodes::const_iterator it = nodes.begin(), end = nodes.end(); it != end; ++it) { + Node* node = *it; + if (node->isBlockNode()) { + Inst* inst = (Inst*)node->getLastInst(); + if (inst == 0) + continue; + + // start with the operands at the block bottom + getIRManager().getLiveAtExit(node, lives); + + // iterate over instructions towards the top of the block + for (;;) { + processInst(inst, lives, matrix /*, excount*/); + + if (inst->getPrevInst() == 0) + break; + + getIRManager().updateLiveness(inst, lives); + inst = inst->getPrevInst(); + } + } +#ifdef _SKIP_CATCHED + else if (node->isDispatchNode()) { + BitSet* tmp = getIRManager().getLiveAtEntry(node); + BitSet::IterB ib(*tmp); + Opndx* opndx; + for (int i = ib.getNext(); i != -1; i = ib.getNext()) + if ((opndx = graph.getOpndx(i)) != 0) { + opndx->ignore = true; + DBGOUT("catched " << *opndx << endl;) + } + } +#endif + } +} + + +void RegAlloc4::processInst (Inst* inst, BitSet& lives, BoolMatrix& matrix) +{ + int defx = -1; + Inst::Opnds opnds(inst, Inst::OpndRole_All); + for (Inst::Opnds::iterator it = opnds.begin(); it != opnds.end(); it = opnds.next(it)) { + uint32 role = inst->getOpndRoles(it); + Opnd* opnd = inst->getOpnd(it); + + // For each operand def, look at the all live operand + if (role & Inst::OpndRole_Def) + lookLives(opnd, lives, matrix); + + Opndx* opndx = graph.getOpndx(opnd->getId()); + if (opndx != 0) { + if (role & Inst::OpndRole_Def) { + defx = opndx->idx; + } else if (flag_NEIGBH && defx != -1 && !lives.getBit(opnd->getId())) { + Opndx* defopndx = graph.at(defx); + defopndx->neighbs.push_back(opndx->idx); + + opndx->neighbs.push_back(defx); + } + } + } +} + + +// Look for operands live at defining point +// +void RegAlloc4::lookLives (Opnd* opnd, BitSet& lives, BoolMatrix& matrix) +{ + int i = opnd->getId(), + x; + + if ((x = graph.getIdx(i)) == -1 && (x = findNode(opnd)) == -1) + return; + + BitSet::IterB bsk(lives); + int k, y; + for (k = bsk.getNext(); k != -1; k = bsk.getNext()) + if (k != i) + if ((y = graph.getIdx(k)) != -1 || (y = findNode(irManager->getOpnd(k))) != -1) + matrix.set(x, y); +} + + +int RegAlloc4::findNode (Opnd* opnd) const +{ + Constraint loc = opnd->getConstraint(Opnd::ConstraintKind_Location); + if (!loc.isNull()) { + int ridx = registers.index(loc); + RegMask msk = loc.getMask(); + + for (unsigned x = xregbase; x != graph.size(); ++x) { + const Opndx* opndx = graph[x]; + assert(opndx->alloc != 0); + if (opndx->ridx == ridx && opndx->alloc == msk) + return x; + } + } + + return -1; +} + + +void RegAlloc4::showGraph (const char* hdr) +{ +#ifdef _DEBUG_REGALLOC4 + log(LogStream::DBG) << "--- graph " << hdr << endl + << graph + << "---------" << endl; + log(LogStream::DBG).flush(); +#endif +} + + +void RegAlloc4::showOpands (const char* hdr, bool du) +{ +#ifdef _DEBUG_REGALLOC4 + if (iop == 0) + return; + + iop->clear(); + + ControlFlowGraph* cfg = getIRManager().getFlowGraph(); + double excount0 = cfg->getEntryNode()->getExecCount(); + + const Nodes& nodes = cfg->getNodesPostOrder(); + Node* node; + for (Nodes::const_iterator it = nodes.begin(); it != nodes.end(); ++it) + { + node = *it; + int insts = node->getInstCount(); + iop->add_node(node->getId(), insts == 0 ? 1 : insts, 0, node->getExecCount() / excount0); + } + + for (Nodes::const_iterator it = nodes.begin(); it != nodes.end(); ++it) + { + node = *it; + const Edges& outs = node->getOutEdges(); + for (Edges::const_iterator edge_ptr = outs.begin(), edge_end = outs.end(); edge_ptr != edge_end; ++edge_ptr) + { + Node* dst_node = (*edge_ptr)->getTargetNode(); + iop->add_edge(node->getId(), dst_node->getId()); + } + } + + Opndx* opndx; + for (unsigned x = 0; x != graph.size(); ++x) + if ((opndx = graph[x]) != 0 && opndx->opnd != 0) + { + uint32 opid = opndx->opnd->getFirstId(); + iop->add_opand(opid); + + if (opndx->splits != 0) + for (OpndxList::iterator ptr = opndx->splits->begin(), + end = opndx->splits->end(); + ptr != end; ++ptr) + { + Opndx* split_opndx = *ptr; + iop->add_opand(split_opndx->opnd->getFirstId(), opid); + } + } + + if (du) + {// use graph/opndx to retrive du chains + getIRManager().indexInsts(); + + for (unsigned x = 0; x != graph.size(); ++x) + if ((opndx = graph[x]) != 0 && opndx->opnd != 0) + { + uint32 opid = opndx->opnd->getFirstId(); + + for (DefUses::iterator ptr = opndx->defuses.begin(), end = opndx->defuses.end(); ptr != end; ++ptr) + { + CGNode* node = (CGNode*)ptr->inst->getNode(); + + unsigned flags = 0; + uint32 role = ptr->role; + if (role & Inst::OpndRole_Def) + flags |= IOp::DEF; + if (role & Inst::OpndRole_Use) + flags |= IOp::USE; + if (role & DefUse_Flags::Split) + flags |= IOp::SPLIT; + + int y = ptr->inst->getIndex() - ((Inst*)node->getFirstInst())->getIndex(); + iop->add_opand_du(opid, node->getId(), y, (IOp::OpFlag)flags); + } + } + } + else + { + for (Nodes::const_iterator it = nodes.begin(); it != nodes.end(); ++it) + { + node = *it; + if (node->isBlockNode()) + { + int y = 0; + for (Inst* inst = (Inst*)node->getFirstInst(); inst != 0; inst = inst->getNextInst(), ++y) + { + Inst::Opnds opnds(inst, Inst::OpndRole_All); + for (Inst::Opnds::iterator it = opnds.begin(); it != opnds.end(); it = opnds.next(it)) + { + Opnd* opnd = inst->getOpnd(it); + if (graph.getOpndx(opnd->getId()) != 0) + { + uint32 role = inst->getOpndRoles(it); + unsigned flags = 0; + if (role & Inst::OpndRole_Def) + flags |= IOp::DEF; + if (role & Inst::OpndRole_Use) + flags |= IOp::USE; + + iop->add_opand_du(opnd->getFirstId(), node->getId(), y, (IOp::OpFlag)flags); + } + } + } + } + } + } + + BitSet lives(mm, getIRManager().getOpndCount()); + + for (Nodes::const_iterator it = nodes.begin(); it != nodes.end(); ++it) + { + node = *it; + BitSet::IterB ib(*getIRManager().getLiveAtEntry(node)); + for (int i = ib.getNext(); i != -1; i = ib.getNext()) + if (Opndx* opndx = graph.getOpndx(i)) + iop->add_opand_du(opndx->opnd->getFirstId(), node->getId(), -1, IOp::LIVE_AT_ENTRY); + + getIRManager().getLiveAtExit(node, lives); + ib.init(lives); + for (int i = ib.getNext(); i != -1; i = ib.getNext()) + if (Opndx* opndx = graph.getOpndx(i)) + iop->add_opand_du(opndx->opnd->getFirstId(), node->getId(), -1, IOp::LIVE_AT_EXIT); + } + + iop->xml_write(hdr); +#endif +} + + +bool RegAlloc4::coalescing (BoolMatrix& matrix) +{ + //static CountTime coalescingTimer("ia32::RegAlloc4::coalescing"); + //AutoTimer tm(coalescingTimer); + + int x0, x1; + + const Nodes& nodes = irManager->getFlowGraph()->getNodesPostOrder(); + for (Nodes::const_iterator it = nodes.begin(), end = nodes.end(); it!=end; ++it) + { + Node* node = *it; + if (node->isBlockNode()) + for (const Inst* inst = (Inst*)node->getLastInst(); inst != 0; inst = inst->getPrevInst()) + if (inst->getMnemonic() == Mnemonic_MOV) + if ((x0 = graph.getIdx(inst->getOpnd(0)->getId())) != -1 && + (x1 = graph.getIdx(inst->getOpnd(1)->getId())) != -1 && + x0 != x1 && !matrix.test(x0, x1)) + { + Opndx* opndx0 = graph.at(x0), + * opndx1 = graph.at(x1); + + RegMask avail = opndx0->avail & opndx1->avail; + unsigned nbavails = bitCount(avail); + + if (opndx0->ridx != opndx1->ridx || nbavails == 0) + continue; + + //if (opndx0.opnd->getSize() != opndx1.opnd->getSize()) + // continue; + + Type* t0 = opndx0->opnd->getType(), + * t1 = opndx1->opnd->getType(); + + //if ((t0->isManagedPtr() || t0->isObject()) != (t1->isManagedPtr() || t1->isObject())) + // continue; + + if (!Type::mayAlias(&irManager->getTypeManager(), t0, t1)) + continue; + + //DBGOUT("coalesce candidates (" << x0 << ") & (" << x1 << ") " << *inst << endl;) + + unsigned xdegree = 0, // estimated degree of the coalesced node + xcount = 0; // number of neighbours with degree >= k + + xdegree = opndx0->adjacents.size() + opndx1->adjacents.size() + - duplicates(opndx1->adjacents, matrix, x0, x1); + + for (Indexes::iterator ptr = opndx0->adjacents.begin(), end = opndx0->adjacents.end(); ptr != end; ++ptr) + { + Indexes& ixs = graph.at(*ptr)->adjacents; + unsigned ndegree = ixs.size() - duplicates(ixs, matrix, x0, x1); + if (ndegree >= nbavails) + if (++xcount >= nbavails) + break; + } + for (Indexes::iterator ptr = opndx1->adjacents.begin(), end = opndx1->adjacents.end(); ptr != end; ++ptr) + if (!matrix.test(*ptr, x0)) + { + Indexes& ixs = graph.at(*ptr)->adjacents; + unsigned ndegree = ixs.size(); + if (ndegree >= nbavails) + if (++xcount >= nbavails) + break; + } + + //DBGOUT("xdegree:" << xdegree << " xcount:" << xcount << endl;) + + if (xcount >= nbavails || xdegree >= nbavails) + continue; + + coalesce (matrix, opndx0, opndx1); + return true; + } + } + + return false; +} + + +// Colalesce graph nodes (x0) and (x1) and the corresponding operands. +// Node (x1) not to be used anymore, (x0) must be used unstead. +// Note that (x1) remains in the graph (must be ignored) +// +void RegAlloc4::coalesce (BoolMatrix& matrix, Opndx* opndx0, Opndx* opndx1) +{ + merge(opndx0, opndx1); + + graph.setIdx(opndx1->opnd->getId(), opndx0->idx); // ??? + + Indexes tmp(mm); // list of trash indexes + + for (Indexes::iterator ptr = opndx1->adjacents.begin(), end = opndx1->adjacents.end(); ptr != end;) + { + Indexes::iterator ptr_next = ptr; + ++ptr_next; + + int x = *ptr; + assert(matrix.test(x, opndx1->idx)); + matrix.clear(x, opndx1->idx); + + Opndx* opndx = graph.at(x); + Indexes::iterator ptrx = find(opndx->adjacents.begin(), opndx->adjacents.end(), opndx1->idx); + assert(ptrx != opndx->adjacents.end()); + + if (matrix.test(x, opndx0->idx)) + {// disconnect (x1 - x) + tmp.splice(tmp.end(), opndx1->adjacents, ptr); + tmp.splice(tmp.end(), opndx->adjacents, ptrx); + } + else + {// connect (x0 - x) + matrix.set(x, opndx0->idx); + opndx0->adjacents.splice(opndx0->adjacents.end(), opndx1->adjacents, ptr); // x0 -> x + *ptrx = opndx0->idx; // x -> x0 + } + + ptr = ptr_next; + } + + assert(opndx1->adjacents.empty()); + + ++coalesceCount; +} + + +int RegAlloc4::duplicates (RegAlloc4::Indexes& list, RegAlloc4::BoolMatrix& matrix, int x0, int x1) +{ + int count = 0; + + for (RegAlloc4::Indexes::iterator ptr = list.begin(), end = list.end(); ptr != end; ++ptr) + if (*ptr != x0 && *ptr != x1) + if (matrix.test(*ptr, x0) && matrix.test(*ptr, x1)) + ++count; + + return count; +} + + +struct RegAlloc4::sortRule1 +{ + const RegAlloc4::Graph& graph; + const unsigned int rule; + + sortRule1 (const RegAlloc4::Graph& g, unsigned int r) :graph(g), rule(r) {} + + bool operator () (int x1, int x2) + { + const RegAlloc4::Opndx* opndx1 = graph.at(x1), + * opndx2 = graph.at(x2); + + return rule == 1 ? opndx1->cost > opndx2->cost + : opndx1->cost < opndx2->cost; + } +}; + + +void RegAlloc4::pruneGraph () +{ + static CountTime pruneGraphTimer("ia32::RegAlloc4::pruneGraph"); + AutoTimer tm(pruneGraphTimer); + + DBGOUT(endl << "pruneGraph"<< endl;) + +// Calculate number of nodes that should be pruned off the graph + int nbnodes = 0; + for (unsigned i = 0; i != graphsize; ++i) + if (shouldPrune(graph.at(i))) + nbnodes++; + + StlVector tmp(mm); + + nstack.reserve(nbnodes); + while (nbnodes > 0) + { + // Apply degree < R rule + + if (flag_SORT == 0) + for (bool succ = false; !succ;) { + succ = true; + for (unsigned i = 0; i != graphsize; ++i) { + Opndx* opndx = graph.at(i); + if (shouldPrune(opndx)) { + const unsigned n = opndx->adjacents.size(); + if (n != 0 && n < opndx->nbavails) { + nbnodes -= graph.disconnect(i); + nstack.push_back(i); + succ = false; + //DBGOUT(" rule#1 (" << i << ")" << endl;) + } + } + } + } + else + for (bool succ = false; !succ;) { + succ = true; + tmp.resize(0); + + for (unsigned i = 0; i != graphsize; ++i) { + Opndx* opndx = graph.at(i); + if (shouldPrune(opndx)) { + const unsigned n = opndx->adjacents.size(); + if (n != 0 && n < opndx->nbavails) + tmp.push_back(i); + } + } + + if (tmp.size() != 0) { + if (tmp.size() > 1) + sort(tmp.begin(), tmp.end(), sortRule1(graph, flag_SORT)); + + for (StlVector::iterator it = tmp.begin(); it != tmp.end(); ++it) { + nbnodes -= graph.disconnect(*it); + nstack.push_back(*it); + } + + succ = false; + } + } + + // Apply degree >= R rule + + if (nbnodes > 0) { + int x = -1, n; + double cost = 0, w; + + // Find some node to disconnect + for (unsigned i = 0; i != graphsize; ++i) { + Opndx* opndx = graph.at(i); + if (shouldPrune(opndx)) + if ((n = (int)opndx->adjacents.size()) != 0) { + w = opndx->cost/(double)n; + if (x == -1 || w < cost) { + cost = w, + x = i; + } + } + } + + assert(x != -1); + if (x != -1) { + nbnodes -= graph.disconnect(x); + nstack.push_back(x); + } + } + } +} + + +bool RegAlloc4::shouldPrune (const Opndx* opndx) const +{ + return opndx != 0 && !opndx->spill && opndx->alloc == 0 && !opndx->adjacents.empty(); +} + + +bool RegAlloc4::assignRegs () +{ + DBGOUT("assignRegs" << endl;) + + static CountTime assignRegsTimer("ia32::RegAlloc4::assignRegs"); + AutoTimer tm(assignRegsTimer); + + while (!nstack.empty()) { + int x = nstack.back(); + nstack.pop_back(); + + Opndx* opndx = graph.at(x); + graph.reconnect(x); + if (opndx->alloc == 0) { + DBGOUT("(" << x << ")" << endl;) + opndx->spill = !assignReg(opndx); + } + } + + int spilled = 0; + double spill_cost = 0; + + for (unsigned x = 0; x != graphsize; ++x) { + Opndx* opndx = graph.at(x); + if (opndx != 0) { + if (opndx->alloc == 0 && !opndx->spill) { + DBGOUT("(" << x << ")" << endl;) + opndx->spill = !assignReg(opndx); + } + + if (opndx->spill) { + ++spilled; + spill_cost += opndx->cost; + } + } + } + + DBGOUT("spilled " << spilled << " operands, cost " << spill_cost << endl;) + + return spilled == 0; +} + + +bool RegAlloc4::assignReg (Opndx* opndx) +{ + RegMask alloc = 0; + + for (Indexes::iterator i = opndx->adjacents.begin(); i != opndx->adjacents.end(); ++i) { + Opndx* opndz = graph.at(*i); + if (opndz != 0 && opndz->ridx == opndx->ridx) + alloc |= opndz->alloc; + } + + if ((alloc = opndx->avail & ~alloc) == 0) { + DBGOUT(" assign " << *opndx->opnd << " failed" << endl;) + return false; + } else { + if (!opndx->neighbs.empty()) { + RegMask neighbs = 0; + for (Indexes::iterator i = opndx->neighbs.begin(); i != opndx->neighbs.end(); ++i) { + Opndx* neigbx = graph.at(*i); + if (neigbx != 0 && neigbx->ridx == opndx->ridx) + neighbs |= neigbx->alloc; + } + + if ((neighbs & alloc) != 0 && neighbs != alloc) { + DBGOUT(" !alloc:" << std::hex << alloc << " * neighbs:" << neighbs << " =" << (alloc & neighbs) << std::dec << endl); + alloc &= neighbs; + } + } + + opndx->alloc = findHighest(alloc); + opndx->opnd->assignRegName(getRegName((OpndKind)registers[opndx->ridx].getKind(), + opndx->opnd->getSize(), + bitNumber(opndx->alloc))); + + ++count_assigned; + DBGOUT(" assigned " << *opndx->opnd << endl;) + return true; + } +} + + +/*** +void RegAlloc4::spillRegs () +{ + DBGOUT("spillRegs" << endl;) + + int inserted = 0; + + for (unsigned x = 0; x != graphsize; ++x) + { + Opndx* opndx = graph.at(x); + if (opndx != 0 && opndx->spill) + inserted += spillReg(opndx); + } + + DBGOUT("inserted " << inserted << " operands" << endl;) +} + + +int RegAlloc4::spillReg (Opndx* opndx) +{ + Opnd* opnd = opndx->opnd; + const Constraint initial = opnd->getConstraint(Opnd::ConstraintKind_Initial); + + if ((initial.getKind() & OpndKind_Memory) == 0) + { + DBGOUT(" spilling " << *opndx->opnd << " failed" << endl;) + return 0; + } + + DBGOUT(" spilling " << *opndx->opnd << endl;) + opnd->setCalculatedConstraint(initial); + opnd->assignMemLocation(MemOpndKind_StackAutoLayout, irManager->getRegOpnd(STACK_REG), 0); + + int inserted = 0; + + for (DefUses::iterator ptr = opndx->defuses.begin(), end = opndx->defuses.end(); ptr != end; ++ptr) + { + Opnd* opndnew = getIRManager().newOpnd(opnd->getType(), initial); + Inst* inst = ptr->inst; + Inst* instnew = 0; + bool replaced = false; + if (ptr->role & Inst::OpndRole_Use) + { + instnew = getIRManager().newCopyPseudoInst(Mnemonic_MOV, opndnew, opnd); + instnew->insertBefore(inst); + replaced = inst->replaceOpnd(opnd, opndnew); + assert(replaced); + DBGOUT(" before " << *inst << " inserted " << *instnew << " MOV " << *opndnew << ", " << *opnd << endl;) + } + + if (ptr->role & Inst::OpndRole_Def) + { + assert(!inst->hasKind(Inst::Kind_LocalControlTransferInst)); + if (!replaced) + replaced = inst->replaceOpnd(opnd, opndnew); + assert(replaced); + instnew = getIRManager().newCopyPseudoInst(Mnemonic_MOV, opnd, opndnew); + instnew->insertAfter(inst); + DBGOUT(" after " << *inst << " inserted " << *instnew << " MOV " << *opnd << ", " << *opndnew << endl;) + } + + Constraint c = initial; + update(instnew, opndnew, c); + update(inst, opndnew, c); + opndnew->setCalculatedConstraint(c); + + ++inserted; + } + + ++count_spilled; + return inserted; +} +***/ + + +// If currently handled operand is referenced by current instruction, then evaluate +// constraint of the operand imposed by this instruction and return 'true'. +// Otherwise, do nothing and return false. +// +int RegAlloc4::update (const Inst* inst, const Opnd* opnd, Constraint& constr) const +{ + int count = 0; + Inst::Opnds opnds(inst, Inst::OpndRole_All); + for (Inst::Opnds::iterator it = opnds.begin(); it != opnds.end(); it = opnds.next(it)) + if ( inst->getOpnd(it) == opnd) + { + Constraint c = inst->getConstraint(it, 0, constr.getSize()); + if (constr.isNull()) + constr = c; + else + constr.intersectWith(c); + + count++; + } + return count; +} + + +Node* RegAlloc4::insertNode (Edge* edge) +{ + IRManager& irm = getIRManager(); + + BitSet tmp(mm, irm.getOpndCount()); + irm.getLiveAtExit(edge->getSourceNode(), tmp); + + Node* old_node = edge->getTargetNode(); + Node* new_node = irm.getFlowGraph()->spliceBlockOnEdge(edge, 0, true); + irm.getLiveAtEntry(new_node)->copyFrom(tmp); + DBGOUT("INSERT N#" << new_node->getId() << " from N#" << old_node->getId() << endl;) + return new_node; +} + + +Inst* RegAlloc4::insertCopy (bool atentry, Node* node, Opnd* opnd) +{ + assert(node->isBlockNode()); + Inst* mov = getIRManager().newCopyPseudoInst(Mnemonic_MOV, opnd, opnd); + Inst* inst = (Inst*)node->getLastInst(); + if (atentry || inst == 0) + { + node->prependInst(mov); + } + else + { + if (!inst->hasKind(Inst::Kind_LocalControlTransferInst)) + mov->insertAfter(inst); + else + mov->insertBefore(inst); + } + return mov; +} + + +//======================================================================================== +// Output formatters +//======================================================================================== + + +#ifdef _DEBUG_REGALLOC4 + +static ostream& operator << (ostream& os, Sep& x) +{ + if (x.first) + x.first = false; + else + os << ","; + return os; +} + +static ostream& operator << (ostream& os, const Inst& x) +{ + return os << "I#" << x.getId(); +} + + +static ostream& operator << (ostream& os, const Opnd& x) +{ + os << "O#" << x.getFirstId(); + RegName rn = x.getRegName(); + if (rn != RegName_Null) + os << "<" << getRegNameString(rn) << ">"; + if (x.isPlacedIn(OpndKind_Memory)) + os << ""; + return os; +} + + +static ostream& operator << (ostream& os, Constraint c) +{ + IRPrinter::printConstraint(os, c); + return os; +} + + +static ostream& operator << (ostream& os, const RegAlloc4::Registers& x) +{ + Sep s;; + os << "{"; + for (RegAlloc4::Registers::const_iterator it = x.begin(); it != x.end(); ++it) + os << s << *it; + return os << "}"; +} + + +static ostream& operator << (ostream& os, RegMasks x) +{ + return os << x.c; +} + + +static ostream& outRegMasks (ostream& os, RegAlloc4::RegMask* x, const RegAlloc4::Registers& registers) +{ + Sep s;; + os << "{"; + for (unsigned rk = 0; rk != registers.size(); ++rk) + { + RegAlloc4::RegMask msk = x[rk]; + + for (unsigned rx = 0; msk != 0; ++rx, msk >>= 1) + if ((msk & 1) != 0) + { + RegName reg = getRegName((OpndKind)registers[rk].getKind(), registers[rk].getSize(), rx); + os<< s << getRegNameString(reg); + } + } + return os << "}"; +} + + +static ostream& operator << (ostream& os, const RegAlloc4::DefUse& du) +{ + os << *du.inst; + if ((du.role & Inst::OpndRole_Def) != 0) + os << ":Def"; + if ((du.role & Inst::OpndRole_Use) != 0) + os << ":Use"; + if ((du.role & RegAlloc4::DefUse_Flags::Split) != 0) + os << ":Split"; + if ((du.role & RegAlloc4::DefUse_Flags::Fork) != 0) + os << ":Fork"; + if ((du.role & RegAlloc4::DefUse_Flags::Join) != 0) + os << ":Join"; + return os; +} + + +static ostream& operator << (ostream& os, const RegAlloc4::Opndx& opndx) +{ + os << "(" << opndx.idx << ") "; + + if (opndx.opnd != 0) + os << *opndx.opnd; + else + os << "REG"; + + os << " ridx:" << opndx.ridx + << " avail:" << hex << opndx.avail << " alloc:" << opndx.alloc << dec + //<< " nbavails:" << opndx.nbavails + << " cost:" << opndx.cost; + + if (opndx.spill != 0) + os << " spilled"; + + os << " active:" << opndx.active() + << " sig:" << opndx.significance(); + + if (!opndx.defuses.empty()) + { + Sep s; + os << " defuse{"; + for (RegAlloc4::DefUses::const_iterator i = opndx.defuses.begin(); i != opndx.defuses.end(); ++i) + os << s << *i; + os << "}"; + } + + if (opndx.splits != 0) + { + Sep s; + os << " splits{"; + for (RegAlloc4::OpndxList::const_iterator i = opndx.splits->begin(); i != opndx.splits->end(); ++i) + os << s << (*i)->idx; + os << "}"; + } + + if (!opndx.adjacents.empty()) + { + Sep s; + os << " adjacents{"; + for (RegAlloc4::Indexes::const_iterator i = opndx.adjacents.begin(); i != opndx.adjacents.end(); ++i) + os << s << *i; + os << "}"; + } + + if (!opndx.hiddens.empty()) + { + Sep s; + os << " hiddens{"; + for (RegAlloc4::Indexes::const_iterator i = opndx.hiddens.begin(); i != opndx.hiddens.end(); ++i) + os << s << *i; + os << "}"; + } + + if (!opndx.hiddens.empty()) + { + Sep s; + os << " hiddens{"; + for (RegAlloc4::Indexes::const_iterator i = opndx.hiddens.begin(); i != opndx.hiddens.end(); ++i) + os << s << *i; + os << "}"; + } + + if (!opndx.neighbs.empty()) + { + Sep s; + os << " neighbs{"; + for (RegAlloc4::Indexes::const_iterator i = opndx.neighbs.begin(); i != opndx.neighbs.end(); ++i) + os << s << *i; + os << "}"; + } + + return os; +} + + +static ostream& operator << (ostream& os, const RegAlloc4::Graph& graph) +{ + for (unsigned x = 0; x != graph.size(); ++x) + if (const RegAlloc4::Opndx* opndx = graph[x]) + os << *opndx << endl; + + return os; +} + + +#endif //#ifdef _DEBUG_REGALLOC4 + +} //namespace Ia32 +} //namespace Jitrino