johntitus / node-horseman

Run PhantomJS from Node

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

Unhandled Rejection Error - Can't seem to catch this

NoelDavies opened this issue · comments

commented

I'm crawling a group of X pages on a site my employer has built, each page is a /show/{x} essentially. On some pages I get the below error (or a similar error), and some I don't.

Unhandled rejection Error at ClientRequest.<anonymous> (/path/to/global/node_modules_folder/node_modules/node-phantom-simple/node-phantom-simple.js:659:12) at emitOne (events.js:115:13) at ClientRequest.emit (events.js:210:7) at Socket.socketOnEnd (_http_client.js:435:9) at emitNone (events.js:110:20) at Socket.emit (events.js:207:7) at endReadableNT (_stream_readable.js:1045:12) at _combinedTickCallback (internal/process/next_tick.js:102:11) at process._tickCallback (internal/process/next_tick.js:161:9)

By using the following Code:
Note: You can see I've tried a number of thing to catch the errors or prevent them from occuring but I've had no luck, my guess is that it's running out of memory of something?

var Horseman = require('node-horseman');
const readline = require('readline');
const rl = readline.createInterface({
  input: process.stdin,
  output: process.stdout
});

let issuesThatCauseACrash = [15739, 15724];


var baseUrl = 'https://test.x.domain.com';

rl.question('What base URL would you like use for this X page visitor? (' + baseUrl + ')', (answer) => {
  if (answer.length !== 0) {
  	baseUrl = answer;
  }

 try{
	rl.close();
	hg.setup();
	hg.login();
  } catch (err) {
	console.log('Error');
	hg.horseman.close();
	hg.visitCase(caseId);
  }
});


var hg = {
	horseman: {},

	setup: function () {
		hg.horseman = new Horseman({
			timeout: 500000,
			cookiesFile: './cookies.txt',
			loadImages: false,
			diskCache: false,
			diskCachePath: './cache/'
		})
		;
	},

	login: function () {
		hg.horseman
		  .log('Opening login page')
		  .open(baseUrl + '/login')
		  .type('input#username', 'usernameHere')
		  .type('input#password', 'passwordHere')
		  .click('#_submit')
		  .log('Logging in...')
		  .waitForNextPage()
		  .open(baseUrl + '/')
		  .waitForSelector('#table-of-new-items')
		  .log('opened')
		  .html('#table-new-cases tbody tr td:first-child() a')
		  .then((attribute) => {
		  	this.visitCase(attribute);
		  })
	  	;
	},

	visitCase: function(caseId) {
		caseId = '' + caseId + '';
		console.log(caseId);
		var rawCaseId = '' + caseId.replace(/^BS/, '').replace(/^0+/, '');

		hg.horseman
			.log('Opening case ' + rawCaseId)
			.open(baseUrl + '/cases/' + rawCaseId)
			
		        // .on('error', function (msg, trace) {
		        //     console.log('error: ', msg, trace);
		        // })
                        // .catch(function(e) {
                        //     console.log("horseman error: "+e);
                        // })
			// .log('Page opened, waiting for selector')
			.waitForSelector('#horse-accordion')
			// .log('found accordian')
			// .on('resourceReceived', (response) => {
			// 	if (new RegExp(/newFrontEndPremium/).test(response.url)) {
			// 		nextCaseId = rawCaseId - 1;

			// 		if (nextCaseId > 0) {
			// 			hg.visitCase(nextCaseId);
			// 		}
			// 	}
			// })
			.wait(2000)
			.close()
			.then(() => {
				nextCaseId = rawCaseId - 1;

				if (nextCaseId > 0) {
					hg.visitCase(nextCaseId);
				}
			})
			// .status()
			// .then((statusCode) => {
			// 	nextCaseId = rawCaseId - 1;

			// 	if (nextCaseId > 0) {
			// 		hg.visitCase(nextCaseId);
			// 	}
			// })
		;
	
	}
};

it always dies on the .open().