adaltas / node-hbase

Asynchronous HBase client for NodeJs using REST

Home Page:https://hbase.js.org

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

there are many connections to hbase

simonchan2013 opened this issue · comments

hello, I write a module to do all action in hbase: it create one client, and then do all action with this client. So I think that it should be just one connection to hbase, but I found taht there are 4000 connection to hbase, and most of it are TIME_WAIT status, it that something wrong. Here is my code

var config = GMAPI.config;
var Promise = require('bluebird');
var hbase = require('hbase');

var client = hbase({host: config.HBASE.hostname, port: config.HBASE.port});



// =====================================================================================================================
function get(table, rowKey, column, callback) {
    client.table(table).row(rowKey).get(column, {v:1}, function(err, data) {
        if (err) {
            if (err.code==404) callback(new Error('the mail does not exist!'));
            else callback(err);
            return;
        }

        var res = {};
        res.column = data[0].column.toString('utf-8');
        res.timestamp = data[0].timestamp;
        res.value = data[0].$.toString('utf-8');

        callback(null, res);
    });
};

function put(table, rowData, callback) {
    client.table(table).row().put(rowData, callback);
};


// =====================================================================================================================
module.exports = {
    /**
   * get email content from hbase with callback function
   * @function getMailContent
   * @param {String} rowKey
   * @param {Function} callback
   */
    getMailContent: function(rowKey, callback) {
        get(config.HBASE.mailTable, rowKey, config.HBASE.mailContentCol, callback);
    },

    /**
   * update email status and effect data to hbase
   * @function updateStatusData
   * @param {Array} rowData
   * @param {Function} callback
   */
    updateStatusData: function(rowData, callback) {
        put(config.HBASE.mailStatusTable, rowData, callback);
    },

    /**
   * get email content from hbase, return a Promise
   * @function getMailContentAsync
   * @param {String} rowKey
     * @return Promise
   */
    getMailContentAsync: function(rowKey) {
        return Promise.promisify(this.getMailContent)(rowKey);
    },

    /**
   * update email status and effect data to hbase, return a Promise
   * @function updateStatusDataAsync
   * @param {Array} rowData
   * @return Promise
   */
    updateStatusDataAsync: function(rowData) {
        return Promise.promisify(this.updateStatusData)(rowData);
    },

    /**
   * put email data to hbase
   * @function putMail
   * @param {Array} rowData
   * @param {Function} callback
   */
    putMail: function(rowData, callback) {
        put(config.HBASE.mailTable, rowData, callback);
    },

    /**
   * put email data to hbase, return a Promise
   * @function putMailAsync
   * @param {Array} rowData
   * @param {Function} callback
   */
    putMailAsync: function(rowData) {
        return Promise.promisify(this.putMail)(rowData);
    },

    /**
   * get email lp content from hbase, return a Promise
   * @function getMailContentAsync
   * @param {String} rowKey
     * @return Promise
   */
    getLpContentAsync: function(rowKey, column) {
        return Promise.promisify(get)(config.HBASE.mailTable, rowKey, column);
    }
}

This driver rely on the HBase REST server so even if you initialize une client, you'll still get multipe HTTP client requests. I dont think it could be any different. Do you have such a load which justify 4000 HTTP requests? Do you suspect for the connections to not being properly closed?

hi, I will get data from hbase and put data to hbase very frequently, so maybe it will create one HTTP connection every time I get data, but I don't close this connetion. So the server keep so many HTTP connection that is TIME_WAIT. It that I should close the client every time I get or put data? And how I can do this. Thanks a lot

As far as I can see, this driver handle the connection correctly and close it accordingly. You dont need to close the connection. See Node.js documentation as well as HBase Connection. This seems strange to me. Maybe the answer is on the HBase side.

I add two log print before line 78 and line 105:

do_request = (function(_this) {
    return function() {
      var req;
      req = http[_this.client.options.protocol].request(options, function(res) {
        var body;
        body = '';
        res.on('data', function(chunk) {
          console.log('-----------------------here is get data');
          return body += chunk;
        });
        res.on('end', function() {
          var e, error;
          error = null;
          try {
            body = _this.handleJson(res, body);
          } catch (_error) {
            e = _error;
            body = null;
            error = e;
          }
          return callback(error, body, res);
        });
        return res.on('close', function() {
          var e;
          e = new Error('Connection closed');
          return callback(e, null);
        });
      });
      req.on('error', function(err) {
        return callback(err);
      });
      if (data && data !== '') {
        data = typeof data === 'string' ? data : JSON.stringify(data);
        req.write(data, 'utf8');
      }
      console.log('-----------------------here is end request');
      return req.end();
    };
  })(this);

and then I try to get data, and it print log as follow:
-----------------------here is end request
-----------------------here is get data
-----------------------here is get data
-----------------------here is get data
-----------------------here is get data
-----------------------here is get data

so I think in this request, it call req.end() before all data finish, and as the answer of this question Node.js response from http request not calling 'end' event without including 'data' event, the end event won't fire before all data comsumed, but you return the data, so the req.end() may never call again. Is this the reason lead this case?

If i understdood well, you're suggesting call req.end inside req.on('error', ...) and req.on('end', ...)?

Yes, or use promise to call req.end() in next step? But I don't sure if it works, I am not familiar with http

i dont have much time, could you try by moving line req.end() into those 2 locations and let me know if this is better?

I tried it, But it seems not work. It didn't send the request end to the server, so there are still many TIME_WAIT connections exists.

It's not a bug. If just a mechanism of HTTP. When a client close a connection, the server will keep the connect in TIME_WAIT for 2mls. Maybe a keep-alive agent will helpful, I still check the document.

u're feedback will be welcome

Old issue, closing. Feel free to submit a new issue ideally with a pull request.