pytorch / glow

Softmax Node has a second input named "selected" https://github.com/pytorch/glow/blob/master/tools/ClassGen/NodeGen.cpp#L374 which doesn't seem to be used at all. Actually, Softmax instruction doesn't have this extra input https://github.com/pytorch/glow/blob/master/tools/ClassGen/InstrGen.cpp#L300 and there is no specific lowering between them. Only in the case of gradient version it seems to have a usage.
Could we remove this apparently useless input and cleanup the corresponding OperatorTest? Or maybe I'm missing something?

I believe this is used when doing training. E.g. see IRGen for SoftMaxGrad

glow/lib/IR/IRGen.cpp

Lines 222 to 239 in d546718

    
           case glow::Kinded::Kind::SoftMaxGradNodeKind: { 
        
             auto *SMG = cast<SoftMaxGradNode>(N); 
        
             // Original inputs: 
        
             auto *origIn = valueForNode(SMG->getInput()); 
        
             auto *origSelect = valueForNode(SMG->getSelected()); 
        
             // Values related to the output of the node. 
        
             auto *outGrad = valueForNode(SMG->getGradOfOriginalOutputNamedResult()); 
        
             auto originalNodeResult = SMG->getOriginalOutputForResult(); 
        
             assert(nodeToInstr_.count(originalNodeResult.getNode()) && 
        
                    "Unknown original node"); 
        
             auto *origOut = valueForNode(originalNodeResult); 
        
             auto *srcGrad = builder_.createAllocActivationInst( 
        
                 DECORATE_NODE_NAME(N, "res"), outGrad->getType()); 
        
             auto *SMGI = builder_.createSoftMaxGradInst(N->getName(), origOut, origIn, 
        
                                                         origSelect, srcGrad); 
        
             registerIR(SMG->getGradOfInputNamedInput(), SMGI->getSrcGrad()); 
        
             break; 
        
           }

Yes, I know. But why does SoftMax Node also contain this input operand? Is it required due to some relationship or flow between SoftMax and SoftMaxGrad? If not, why not removing it? It seems like redundant code to me.
Sorry, I'm just trying to understand it better.

Yes, after differentiation it is also passed as an input into SoftMaxGradNode, and is used there. See screenshot below of an example dot, from MLTest's classifyPlayerSport:

We include it in the SoftMaxNode so that when we autogenerate the SoftMaxGradNode it's there for use by the SoftMaxGrad kernel.

glow/lib/Backends/Interpreter/InterpreterNodes.cpp

Lines 2253 to 2269 in 4c1b787

    
           void BoundInterpreterFunction::fwdSoftMaxGradInst(const SoftMaxGradInst *I) { 
        
             auto inG = getWeightHandle(I->getSrcGrad()); 
        
             auto idim = inG.dims(); 
        
             auto outW = getWeightHandle(I->getOrigDest()); 
        
             auto selectedH = getWeightHandle<int64_t>(I->getSelected()); 
        
             inG.clear(); 
        
             // http://eli.thegreenplace.net/2016/the-softmax-function-and-its-derivative/ 
        
             // https://stats.stackexchange.com/questions/79454/softmax-layer-in-a-neural-network 
        
             for (dim_t n = 0; n < idim[0]; n++) { 
        
               for (dim_t i = 0; i < idim[1]; i++) { 
        
                 float delta = (selectedH.at({n, 0}) == (int64_t)i); 
        
                 inG.at({n, i}) = outW.at({n, i}) - delta; 
        
               } 
        
             } 
        
           }

OK, got it. Thanks.

	case glow::Kinded::Kind::SoftMaxGradNodeKind: {
	auto *SMG = cast<SoftMaxGradNode>(N);
	// Original inputs:
	auto *origIn = valueForNode(SMG->getInput());
	auto *origSelect = valueForNode(SMG->getSelected());
	// Values related to the output of the node.
	auto *outGrad = valueForNode(SMG->getGradOfOriginalOutputNamedResult());
	auto originalNodeResult = SMG->getOriginalOutputForResult();
	assert(nodeToInstr_.count(originalNodeResult.getNode()) &&
	"Unknown original node");
	auto *origOut = valueForNode(originalNodeResult);
	auto *srcGrad = builder_.createAllocActivationInst(
	DECORATE_NODE_NAME(N, "res"), outGrad->getType());
	auto *SMGI = builder_.createSoftMaxGradInst(N->getName(), origOut, origIn,
	origSelect, srcGrad);
	registerIR(SMG->getGradOfInputNamedInput(), SMGI->getSrcGrad());
	break;
	}

	void BoundInterpreterFunction::fwdSoftMaxGradInst(const SoftMaxGradInst *I) {
	auto inG = getWeightHandle(I->getSrcGrad());
	auto idim = inG.dims();
	auto outW = getWeightHandle(I->getOrigDest());
	auto selectedH = getWeightHandle<int64_t>(I->getSelected());

	inG.clear();

	// http://eli.thegreenplace.net/2016/the-softmax-function-and-its-derivative/
	// https://stats.stackexchange.com/questions/79454/softmax-layer-in-a-neural-network
	for (dim_t n = 0; n < idim[0]; n++) {
	for (dim_t i = 0; i < idim[1]; i++) {
	float delta = (selectedH.at({n, 0}) == (int64_t)i);
	inG.at({n, i}) = outW.at({n, i}) - delta;
	}
	}
	}

Softmax Node has useless input operand (selected)?