ethereumjs / ethereumjs-monorepo

Monorepo for the Ethereum VM TypeScript Implementation

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

Gas cost disaccords between ethereumjs/evm and py-evm/geth

Alleysira opened this issue · comments

what happened

Hey, I'm running some contracts to test ethereumjs/evm's security with other evms. I used evm.runCall to run solc generated bytecodes. Then I found that when dealing with specific contract, the gas costs of ethereumjs/evm differ from geth and py-evm(while these two vms' results are the same).
I'd like to know whether this is sort of bug or I should change my script to test jsevm better.
Thinks in advance.

Environment

  • OS: Ubuntu 20.04
  • solc 0.8.23
  • node-v20.9.0-linux-x64
  • @ethereumjs/evm@2.1.0
  • @ethereumjs/util@9.0.1
  • @ethereumjs/common@4.1.0
  • @ethereumjs/blockchain@7.0.1
  • @ethereumjs/statemanager@2.1.0
  • memory-level@1.0.0

Here is my contract in solidity and the corresponding bytecode: src and bin.zip

My test script

runcode.js is the script I used to run bytecodes on ethereumjs

const { Account,Address,hexToBytes,bytesToHex } =require('@ethereumjs/util')
const { EVM } =require('@ethereumjs/evm')
const { Chain, Common, Hardfork,ConsensusType,ConsensusAlgorithm  } =require('@ethereumjs/common')
const {Blockchain} = require('@ethereumjs/blockchain')
const { DefaultStateManager } =require('@ethereumjs/statemanager')
const { MemoryLevel } = require( 'memory-level')
const fs = require('fs');
const yargs = require('yargs')
yargs.option('code',{type:"string",demandOption:true})
     .option('sig',{type:"string",demandOption:true})
var argv = yargs.argv
var code = '0x00'
var sig=argv.sig
// note that code has 0x prefix but sig doesn't
if (argv.code!=true){
    code='0x'+argv.code
}

function uint8ArrayToHexString(uint8Array) {

    let hexString = Array.from(uint8Array)
      .map(byte => byte.toString(16).padStart(2, '0'))
      .join('');

    return hexString;
}

async function runEvmRunCall (){
    const common = Common.custom({
        chainId: 1234,
        networkId: 1234,
        defaultHardfork: Hardfork.Shanghai,
        consensus: {
          type: ConsensusType.ProofOfStake,
          algorithm: ConsensusAlgorithm.Casper,
        },
        genesis: {
          gasLimit: 10000000000,
          difficulty: 1,
          nonce: '0x0000000000000000',
          extraData: '0x0',
        },
        comment: 'Custom empty chain for benchmarks',
        bootstrapNodes: [],
        dnsNetworks: [],
      })
    const db = new MemoryLevel() 
    const blockchain = await Blockchain.create(common,db)
    const stateManager = new DefaultStateManager()
    const evm = new EVM({ common,stateManager,blockchain })
    evm.DEBUG=true
    const contractCode = hexToBytes(code) 
    const contractAddress = Address.fromString('0x000000000000000000000000636F6E7472616374')
    await evm.stateManager.putContractCode(contractAddress, contractCode)
    evm.events.on('step', function (data) {

        let hexStack = []
        hexStack = data.stack.map(item => {
            return '0x' + item.toString(16)
        })

        let hexMemory = '0x'
        for (let i=0;i<data.memory.length;i++){
            hexMemory += data.memory[i]
        }
        var opTrace = {
            'pc': data.pc,
            'gas': '0x' + data.gasLeft.toString(16),
            'gasCost': '0x' + data.opcode.fee.toString(16),
            'memory': hexMemory,
            'memsize': data.memoryWordCount.toString(16),
            'stack': hexStack,
            'depth': data.depth,
            'opName': data.opcode.name
        }
        opTrace_json = JSON.stringify(opTrace)
        console.log(opTrace_json)
      })
      if(sig==undefined){
        try{
        const results = await evm.runCall({
            code: hexToBytes(code),
            gasLimit: BigInt('0x'+'ffff'),
            to: contractAddress
        }).catch(console.error);
        var ret = {
            'output':uint8ArrayToHexString(results.execResult.returnValue),
            'gasUsed':'0x'+results.execResult.executionGasUsed.toString(16),
            'error':results.execResult.exceptionError
        }
        ret_json = JSON.stringify(ret)
        console.log(ret_json)
        }
        catch(err){
            console.log(err)
        }
    }
    // sig in defined
        else {
            sig=sig.toString(16)
            if(sig.charAt(0)!= "0" && sig.charAt(1)!= "x"){
                sig='0x'+sig
            }
            const results = await evm.runCall({
                gasLimit: BigInt('0x'+'ffffff'),
                data: hexToBytes(sig),
                to: contractAddress,
                caller: new Address(hexToBytes("0x1c7cd2d37ffd63856a5bd56a9af1643f2bcf545f"))
            });
            var ret = {
                'output':uint8ArrayToHexString(results.execResult.returnValue),
                'gasUsed':'0x'+results.execResult.executionGasUsed.toString(16),
                'error':results.execResult.exceptionError
            }
            ret_json = JSON.stringify(ret)
            console.log(ret_json)
        }
}

runEvmRunCall()

Then I use this python script to call runcode.js, the command is python3 poc.py --sig 0x22ea223100000000000000000000000042a39d51fc07bb9c181a0b62df834575cb3b1aa40000000000000000000000000000000000000000000000000000000054c1f8e0 --code poc/D223ICO.bin-runtime

import subprocess
import argparse

def parse_args():
    """
    Parse input arguments
    """
    parser = argparse.ArgumentParser(description='Test a transaction')

    parser.add_argument('--code', dest='code', default='./poc/xxx.bin-runtime', type=str)
    # function signature bytecode
    parser.add_argument('--sig', dest='signature', default='0x22ea223100000000000000000000000042a39d51fc07bb9c181a0b62df834575cb3b1aa40000000000000000000000000000000000000000000000000000000054c1f8e0', type=str)

    args = parser.parse_args()
    return args

PROJECT_DIR = "/home/alleysira/project" 
args = parse_args()
codefile = open(args.code, "r")
bincode = codefile.read()
codefile.close()
sigName = args.signature
retcode = subprocess.call("node " + PROJECT_DIR + "/poc/runcode.js --code " + bincode + " --sig " + sigName + " > " + PROJECT_DIR + "/poc/jsout.json",shell=True)

The result will be added in the json file. In a similar way, I collected the results from geth and py-evm, then I found the gas cost of jsevm is 0x4a3a, which is different from 0x4076 of both geth and py-evm.
Here is the corresponding json file of 3 evms. gethout.json, jsout.json, pyout.json

Please enlighten me, thanks.

The trace differs 2500 gas and diverges after the CALL at your JSONs at line 930. The memory on Geth/py is not reported. Could you retry running these tests with:

  • Reporting the memory output
  • Ensuring the same gas limit among those tests?

I'm not sure but the memory reported on both implementations seem different (but I cannot directly check since the memory is not reported)

My hunch is (but not sure):

In the second CALL at line 931 you call into address zero. We charge "cold account access" for this (2600 gas). But Geth/Py seems to charge "warm slot access" (100 gas). Note that the difference here is 2500 gas (!). See https://eips.ethereum.org/EIPS/eip-2929

Your py/Geth implementation might have warmed the 0 address?

Are you also sure that Geth/Py run with EIP 2929 enabled? (So at least at the Berlin fork? - Ideally run both EVMs at the same hardfork so here: Shanghai)?

Are you also sure that Geth/Py run with EIP 2929 enabled? (So at least at the Berlin fork? - Ideally run both EVMs at the same hardfork so here: Shanghai)?

Yes, 3 EVMs are all at the same hardfork Shanghai, I will set the same gaslimits and enable the memory report the asked again. Thanks for your quick response!

The trace differs 2500 gas and diverges after the CALL at your JSONs at line 930. The memory on Geth/py is not reported. Could you retry running these tests with:

  • Reporting the memory output
  • Ensuring the same gas limit among those tests?

I'm not sure but the memory reported on both implementations seem different (but I cannot directly check since the memory is not reported)

Thanks for your advice.
I have to add that I'm running standalone Geth evm 1.13.4-stable-3f907d6a and py-evm 0.8.0b1. As you suggested, I uified the gas limit to 0xffffff and recorded the memory. These are the updated json files: pyout.json, jsout.json, gethout.json. Then I check line 930 in jsout.json and 932 in pyout.json, I think your explanation is reasonable. As I'm more concerned about the security of EVM implementations. I would like to know if the gas inconsistency issue is security-related or the difference is a feature and whether it is necessary to inform the developers of Geth and PyEVM.
Thanks again for your patience.

Could you post the commands you use in order to run this on Geth/PyEVM?

Ok I think I am sure what causes this. It is this: https://eips.ethereum.org/EIPS/eip-3651

In our EVM we do not know what the coinbase of the block is, therefore it is not warm. However, I just checked PyEVMs code and they set coinbase to the zero address https://github.com/ethereum/py-evm/blob/b5c9fd9651505d1714b5f8a2466633541da81bb0/eth/_utils/headers.py#L63-L64. This is exactly the address you call into at line 931. Therfore, in PyEVM (and I also suspect Geth) this address is warm. However, in EthJS EVM it is cold. As mentioned before this would account for the 2500 gas difference.

In order to test if this is true, you can do two things:

  1. Test if the output is correct in case you use a pre-Shanghai hardfork (EIP 3651 got introduced in Shanghai). For instance, the Merge (Paris), or London, or Berlin (or even earlier)
  2. Or, in your contract, change the default ICO token address. (src.sol, line 622, change address public ICO_token = address(0) ; to something which is not address(0))

Side note, in our VM (which we use in our client) we warm the address

if (this.common.isActivatedEIP(3651) === true) {
this.evm.journal.addAlwaysWarmAddress(bytesToUnprefixedHex(block.header.coinbase.bytes))

Ok I think I am sure what causes this. It is this: https://eips.ethereum.org/EIPS/eip-3651

In our EVM we do not know what the coinbase of the block is, therefore it is not warm. However, I just checked PyEVMs code and they set coinbase to the zero address https://github.com/ethereum/py-evm/blob/b5c9fd9651505d1714b5f8a2466633541da81bb0/eth/_utils/headers.py#L63-L64. This is exactly the address you call into at line 931. Therfore, in PyEVM (and I also suspect Geth) this address is warm. However, in EthJS EVM it is cold. As mentioned before this would account for the 2500 gas difference.

In order to test if this is true, you can do two things:

  1. Test if the output is correct in case you use a pre-Shanghai hardfork (EIP 3651 got introduced in Shanghai). For instance, the Merge (Paris), or London, or Berlin (or even earlier)
  2. Or, in your contract, change the default ICO token address. (src.sol, line 622, change address public ICO_token = address(0) ; to something which is not address(0))

Yes! I tested in Paris version with solc --evm-version paris --bin-runtime, both the output and the gasused of 3 EVMs are the same. Actually as you can see in my script to run pyevm and Geth, the coinbase is indeed set to address 0.

Geth

I directly run the evm executable file of Geth 1.13.4-stable-3f907d6a.

./evm --debug --gas 0xffff --noreturndata=false --nomemory=false --json --code bincode --input --prestate genesis.json run > gethout.json

with genesis.json, where "coinbase" is "0x0000000000000000000000000000000000000000"

{
    "config": {
      "chainId": 9599,
      "homesteadBlock": 0,
      "eip150Block": 0,
      "eip155Block": 0,
      "eip158Block": 0,
      "byzantiumBlock": 0,
      "constantinopleBlock": 0,
      "petersburgBlock": 0,
      "istanbulBlock": 0,
      "berlinBlock": 0,
      "londonBlock": 0,
      "parisBlock": 0,
      "parisTime": 0,
      "shanghaiBlock": 0,
      "shanghaiTime": 0
    },
    "alloc": {
      "0x1c7cd2d37ffd63856a5bd56a9af1643f2bcf545f": {
        "balance": "111111111"
      }
    },
    "coinbase": "0x0000000000000000000000000000000000000000",
    "difficulty": "1",
    "extraData": "",
    "gasLimit": "0xffffff",
    "nonce": "0x0000000000000042",
    "mixhash": "0x0000000000000000000000000000000000000000000000000000000000000000",
    "parentHash": "0x0000000000000000000000000000000000000000000000000000000000000000",
    "timestamp": "0x00"
}

py-evm

And this is the python script I used to run py-evm, which 'coinbase' in GENESIS_PARAMS is set constants.ZERO_ADDRESS.

from eth import constants
from eth.db.atomic import AtomicDB
from eth import constants
from eth.chains.base import MiningChain
from eth_utils import (to_wei, decode_hex,  to_canonical_address,)
from eth.vm.forks.shanghai import ShanghaiVM
from eth.vm.forks.paris import ParisVM
from eth_typing import Address
from eth_keys import keys
from eth.tools.transaction import new_transaction
from cytoolz import assoc
import argparse

def parse_args():
    """
    Parse input arguments
    """
    parser = argparse.ArgumentParser(description='Test a transaction')

    # contract runtime bytecode  $ solc xxx.sol --bin-runtime
    parser.add_argument('--data', dest='data', default='', type=str)
    # function signature bytecode
    parser.add_argument('--sig', dest='signature', default='', type=str)

    args = parser.parse_args()
    return args

def funded_address_initial_balance():
    return to_wei(0xffff, 'ether')

def base_genesis_state(funded_address, funded_address_initial_balance):
    return {
        funded_address: {
            'balance': funded_address_initial_balance,
            'nonce': 0,
            'code': b'',
            'storage': {},
        }
    }

def funded_address_private_key():
    return keys.PrivateKey(
        decode_hex('0x45a915e4d060149eb4365960e6a7a45f334393093061116b197e3240065ff2d8')
    )

def genesis_state(base_genesis_state,simple_contract_address, bytecode):
    # base_genesis_state is a dict, simple_contract_address is key, {b,n,c,s} is value :)
    result = assoc(
        base_genesis_state,
        simple_contract_address,
        {
            'balance': 0,
            'nonce': 0,
            'code': decode_hex(bytecode),  # contract bytecode
            'storage': {},
        },
    )
    return result

GENESIS_PARAMS = {
     'coinbase': constants.ZERO_ADDRESS,
     'transaction_root': constants.BLANK_ROOT_HASH,
     'receipt_root': constants.BLANK_ROOT_HASH,
     'difficulty': 0,
     'gas_limit': constants.GENESIS_GAS_LIMIT,
     'timestamp': 0,
     'extra_data': constants.GENESIS_EXTRA_DATA,
     'nonce': b'\x00' * 8
}

def main():
    args = parse_args()

    init_address = to_canonical_address("8888f1f195afa192cfee860698584c030f4c9db1")

    base_state = base_genesis_state(init_address, funded_address_initial_balance())

    # with chain code
    simple_contract_address = to_canonical_address("0x692a70d2e424a56d2c6c27aa97d1a86395877b3a")

    klass = MiningChain.configure(
        __name__='MyTestChain',
        vm_configuration=(
             (constants.GENESIS_BLOCK_NUMBER,ShanghaiVM),
         )

    )

    SENDER = to_canonical_address("0xa94f5374fce5edbc8e2a8697c15331677e6ebf0b")
    SENDER_PRIVATE_KEY = funded_address_private_key()
    
    GENESIS_STATE = genesis_state(base_state, simple_contract_address,args.data)
    chain = klass.from_genesis(AtomicDB(), GENESIS_PARAMS, GENESIS_STATE)
    
    call_txn = new_transaction(
        chain.get_vm(),
        SENDER,
        simple_contract_address,
        private_key=SENDER_PRIVATE_KEY,
        gas=0xffffff,
        # data=function_selector,
        data=decode_hex(args.signature),
    )
    result_bytes = chain.get_transaction_result(call_txn, chain.get_canonical_head())

if __name__ == '__main__':
    main()

I think this problem seems reasonable to me and I will take some time to catch the EIPs. As I'm a Ph.D candidate for security and the "Security Considerations" column in EIP3651 is empty, I'm still curious about the difference of warm addresses setting in EthJS EVM and Geth/Py-EVM is sort of bug or an intentional feature.
By the way, It's very nice of you to help me understand the EVM, and I sincerely appreciate it.

I think the confusion might come due to the fact that we have a lightweight EVM package and a full-fletched VM package. The EVM, for instance, does not know what a "block" is and does not have this dependency. To run full ethereum tests, one has to use the VM and not the EVM because many tests would fail over there (we do this). Also, our client uses the VM package.

I will close this issue, happy to help, please let us know if you need some more help 😄 (I will also convert this to a discussion)