ariya / phantomjs

Scriptable Headless Browser

Home Page:http://phantomjs.org

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

memory leak with WebPage class, multiple page loads

jasonswearingen opened this issue · comments

There is a memory leak when reusing a phantomjs instance, averaging about 1.4mb per page load in debian 7. (for example, loading 200 pages results in 280mb memory use). I have reprod this on Debian7 x64 and Windows8 x64

I have included a phantomjs script that can be used to reproduce the memory leak. I am following what seems to be "best practice" by invoking page.close() after the load is done, but that does not resolve the issue.

fyi, in addition to my repro script, i have tried various combinations of tests such as

  • close after page.onLoadFinished() has completed (not during)
  • reusing the same page object multiple times
  • not calling page.stop()

none of those had any positive impact on the memory leak problem.

here's my repro script, paste it into something like memoryleak.js and run from the command prompt.
NSFW: The test domains are the top 100 domains by traffic, and that includes porn sites, so if you have some corporate IT network traffic monitoring going on, remove the naughty sites from the testUrls array.

/// memoryleak.js
function format(toFormat) {
    var args = [];
    for (var _i = 0; _i < (arguments.length - 1); _i++) {
        args[_i] = arguments[_i + 1];
    }
    return (toFormat).replace(/\{(\d+)\}/g, function (match, number) {
        return typeof args[number] !== "undefined" ? args[number].toString() : match;
    });
}
;
var system = require("system");
phantom.onError = function (msg, trace) {
    try  {
        console.log("phantom encountered an error.  exiting... " + "msg=\"" + msg + "\"" + " trace=\"" + trace + "\"/>");
    }finally {
        phantom.exit(-1);
    }
};
/** NSFW: The test domains are the top 100 domains by traffic, and that includes porn sites, so if you have some corporate IT network traffic monitoring going on, remove the naughty sites from the testUrls array. */
var testUrls = [
    "google.com", 
    "facebook.com", 
    "youtube.com", 
    "yahoo.com", 
    "amazon.com", 
    "bing.com", 
    "ebay.com", 
    "wikipedia.org", 
    "craigslist.org", 
    "linkedin.com", 
    "live.com", 
    "twitter.com", 
    "blogspot.com", 
    "aol.com", 
    "go.com", 
    "pinterest.com", 
    "msn.com", 
    "tumblr.com", 
    "cnn.com", 
    "ask.com", 
    "huffingtonpost.com", 
    "netflix.com", 
    "paypal.com", 
    "weather.com", 
    "conduit.com", 
    "espn.go.com", 
    "instagram.com", 
    "wordpress.com", 
    "bankofamerica.com", 
    "akamihd.net", 
    "imdb.com", 
    "chase.com", 
    "microsoft.com", 
    "about.com", 
    "avg.com", 
    "pornhub.com", 
    "comcast.net", 
    "foxnews.com", 
    "apple.com", 
    "walmart.com", 
    "xhamster.com", 
    "mywebsearch.com", 
    "wellsfargo.com", 
    "xvideos.com", 
    "yelp.com", 
    "imgur.com", 
    "nytimes.com", 
    "nbcnews.com", 
    "cnet.com", 
    "reddit.com", 
    "adobe.com", 
    "ehow.com", 
    "pandora.com", 
    "pch.com", 
    "hulu.com", 
    "zedo.com", 
    "etsy.com", 
    "flickr.com", 
    "outbrain.com", 
    "optmd.com", 
    "indeed.com", 
    "livejasmin.com", 
    "zillow.com", 
    "target.com", 
    "xnxx.com", 
    "homedepot.com", 
    "redtube.com", 
    "answers.com", 
    "thepiratebay.sx", 
    "att.com", 
    "shopathome.com", 
    "wikia.com", 
    "dailymail.co.uk", 
    "usps.com", 
    "babylon.com", 
    "ups.com", 
    "bestbuy.com", 
    "youporn.com", 
    "reference.com", 
    "godaddy.com", 
    "groupon.com", 
    "deviantart.com", 
    "usatoday.com", 
    "pof.com", 
    "capitalone.com", 
    "bbc.co.uk", 
    "washingtonpost.com", 
    "match.com", 
    "drudgereport.com", 
    "mlb.com", 
    "tripadvisor.com", 
    "pogo.com", 
    "verizonwireless.com", 
    "blogger.com", 
    "buzzfeed.com", 
    "doublepimp.com", 
    "inksr.com", 
    "delta-search.com", 
    "fedex.com", 
    "inksdata.com", 
    "oyodomo.com", 
    "aweber.com", 
    "abcnews.go.com", 
    "vimeo.com", 
    "hootsuite.com", 
    "bleacherreport.com", 
    "lowes.com", 
    "yellowpages.com", 
    "americanexpress.com", 
    "tube8.com", 
    "yieldmanager.com", 
    "salesforce.com"
];
var readyToDispose = false;
var _testsRemaining = 1000;
var openRequest;
var lastSeen;
var lastSeenStartTime;
function disposePage() {
    openRequest.close();
    openRequest = null;
    readyToDispose = false;
}
function _tryNextText() {
    if(openRequest != null) {
        //if (readyToDispose) {
        //  disposePage();
        //  return false;
        //}
        if(lastSeen == openRequest) {
            var elapsed = Date.now() - lastSeenStartTime;
            if(elapsed > 10000) {
                console.log(format("PAGE LOAD TIMEOUT! aborting url={0},  pageLen={1}", openRequest.url, openRequest.content.length));
                //timed out, next loop will reacquire
                openRequest.stop();
                //openRequest.close();
                //openRequest = null;
                //lastSeen = null;
                //lastSeenStartTime = null;
                            } else {
                //not yet timed out
                return false;
            }
        } else {
            //set our last seen then wait for next loop
            lastSeen = openRequest;
            lastSeenStartTime = Date.now();
        }
        return false;
    }
    var index = _testsRemaining % testUrls.length;
    var targetUrl = "http://www." + testUrls[index];
    _testsRemaining--;
    if(_testsRemaining <= 0) {
        console.log("TESTS COMPLETE!  check your memory usage");
        clearInterval(loopHandle);
        return false;
    }
    console.log(format("[{0}] = {1} starting...", _testsRemaining, testUrls[index]));
    var thisPage = require("webpage").create();
    openRequest = thisPage;
    openRequest.onLoadFinished = function (status) {
        if(openRequest == thisPage) {
            console.log(format("got page, url ={0}, targetUrl={1}  pageLen={2}", openRequest.url, targetUrl, openRequest.content.length));
            //readyToDispose = true;
            openRequest.stop();
            disposePage();
        } else if(openRequest == null) {
            console.log("ERROR? page opened while openRequest==null,  pageurl=" + targetUrl);
            //phantom.exit(-1);
                    } else {
            console.log(format("ERROR!  WRONG PAGE OPEN! got page, openPageurl ={0}, targetUrl={1}  pageLen={2}", openRequest.url, targetUrl, openRequest.content.length));
            phantom.exit(-1);
        }
    };
    openRequest.open(targetUrl);
    return true;
}
testUrls.length = _testsRemaining > testUrls.length ? testUrls.length : _testsRemaining;
console.log("running tests, count = " + _testsRemaining);
console.log("setting interval ");
var loopHandle = setInterval(function () {
    _tryNextText();
}, 500);

PS: run that script for long enough (usually around 400 page loads) and the phantomjs exe crashes, sometimes silently, sometimes with segfaults

PPS: i'm not out of ram when phantom crashes, and it can crash anywhere from 50 page loads on up to never, just averaging somewhere about 400.

I faced some problem on Windows. No resolution yet.

I met the same problem

Does anyone know how to solve.

No workaround yet...

same issue for me, any response @ariya ?

reproduced it on all versions >= 1.7.0

since I needed a solution to this problem for my product, I decided to make a webservice to offload screen-scrape requests:

http://PhantomJsCloud.com

It's totally free right now (I just launched it) and will later cost $0.0002/page if you exceed the free tier.

right now the features focus on screen-scraping. if you use my webservice and want some other phantomjs feature exposed, lemmi know.

a mitigation is to do what i do for my webservice: exec a max of 50 or so page loads per phantomjs process, then restart the process. it works out pretty good and you don't have too much of an initialization overhead.

I'm having this problem too. No workarounds so far.

fyi, i just a disclaimer to my repro source about some of the test domains being NSFW.

Yes, we have the same problem, version 1.9.2
In our production environment, phantomjs eats 21G + within 4 days uptime.

the only possible workaround is to restart phantomjs process each time you load page

2 process 30mbytes each instead of 1 process and 200+mbytes (after 5-10 loads)

@veveve thanks for your report, just added my comment there.

@cursedcoder thanks for comment! I've answered you there (https://bugreports.qt-project.org/browse/QTBUG-36530).

Allan replaced report by another one (https://bugreports.qt-project.org/browse/QTBUG-34494),
where reported that problem exists only if auto load of images off. Could you try to test your code with load-images option is enabled?

@veveve it leaks a lot in both: load images on, load images off.

Any updates with this memory leak issue?

Look here: http://stackoverflow.com/a/21642607/1113207

  1. Try to use phantomjs with option "--load-images=true".
  2. Try not to use phantomjs for sites with video elements.

If this won't help, you can post your example here, I'll try to test if it's another QT or phantomjs bug.

@veveve using script in the first post and --load-images=true I have 250mbytes after visiting 60 sites.

Any news on 1.9.7? On 2.x?

https://bugreports.qt-project.org/browse/QTBUG-38857
https://bugreports.qt-project.org/browse/QTBUG-36707
None of bugs fixed even in Qt5.3. Looks like it will be long story.

You can try:

  1. To use phantomjs with option "--load-images=true".
  2. Not to use phantomjs for sites with video elements.

Maybe it helps.

FYI, I can verify that there is still a significant memory leak when using
the "--load-images=true" option. I have not contrasted it VS not using
the switch, so maybe it helps, maybe not.

FYI Right now to consistently render up to 10 pages without running out of
ram I need at least 500mb allocated to each instance. (I run a SaaS using
PhantomJs v1.9.7 so I think this is a pretty reliable metric)

On Tue, Jul 1, 2014 at 1:34 PM, veveve notifications@github.com wrote:

https://bugreports.qt-project.org/browse/QTBUG-38857
https://bugreports.qt-project.org/browse/QTBUG-36707
None of bugs fixed even in Qt5.3. Looks like it will be long story.

You can try:

  1. To use phantomjs with option "--load-images=true".
  2. Not to use phantomjs for sites with video elements.

Maybe it helps.


Reply to this email directly or view it on GitHub
#11390 (comment).

Would love to see a fix.

Any fix now? Try all the solutions above, no help, issue still.

same problem on v1.9.8

Hi All,,

I am also having the same problem as my PhantomJs crashes as soon as a number of some karma test cases gets executed. After going through lots of blogs and articles i understood that memory leaks can only be handled y using page.close().in server.js of phantomJs.

I am not able to figure it out how to find that server.js file of PhantomJs as we use launcher-configuration depency to get phantom.

Will really be thankful to you..

This looks like a duplicate issue:
#12903

Guys,

Still I m on the same step can anyone suggest me the best possible solution.

Will really be thankful to you..

+1, I'm also facing this problem.
I'm using Phantom to run my unit tests, each test run eats another ~100 MB of memory.

Also running into the issue, get a OutOfMemmory exception when this runs

+1 for me. I got the same issue.

  • Either the memory leaks and in 2 hours the phantomjs process crashes. (with page.settings.loadImages= false)
  • Or, If I set (page.settings.loadImages = true), it simply hangs after a day of running. Does not respond anymore...but the memory leakage is better.

See my post (with dmp file #13210)
Any solution, thanks.

+1. Using phantom in production & it's crashing frequently due to memory leaks. Are there any "hacky" workarounds for now that we can use to mitigate this issue? Thanks in advance.

It's killing me. At this point I've decided to just kill it and restart it
every time it runs through.

On Wed, Jun 10, 2015 at 7:56 PM Blake Jennings notifications@github.com
wrote:

+1. Using phantom in production & it's crashing frequently due to memory
leaks. Are there any "hacky" workarounds for now that we can use to
mitigate this issue? Thanks in advance.


Reply to this email directly or view it on GitHub
#11390 (comment).

+1 here.

This is reproducible even with 2.0.0. I guess restart is the only option.

commented

Happy birthday issue! You are 2 years old now.
+1

This is not a memory leak.

Tried working with page.close() within onLoadFinished callback and without it as well. Stuck at it!!

same problem #moroccan-dude

@vitallium care to elaborate?

commented

Any news?

Yes. rebuild in master branch solved the problem.

[image: image]

Phan Thanh An
In charge of IT

HR Dept.
TRACODI

' + 84 8 0902 817 205

89 Cach Mang Thang Tam St.

Ben Thanh Ward, Dist. 1, HCMC

Fax: + 84 8 38330317

Website: www.tracodi.com.vn http://tracodi.com.vn/

On Thu, Dec 10, 2015 at 2:27 PM, BYaka notifications@github.com wrote:

Any news?


Reply to this email directly or view it on GitHub
#11390 (comment).

commented

@dailystore commented on 13 дек. 2015 г., 14:02 GMT+3:

Yes. rebuild in master branch solved the problem.

Can you please link to commit, that solve problem?

@byaka you can try to download the branch 2.0 at https://github.com/ariya/phantomjs/archive/2.0.zip
or the branch master at https://github.com/ariya/phantomjs/archive/master.zip
P/S: I don't remember the branch version when I updated. It was the master branch that time. The repository has changed lately.

Here are a couple VM maps taken about 20 minutes apart on the way toward Memory Exhausted.
See #14143. This is with locally-built casper and phantom, which I think are current except for the [wip].

@vitallium I hope you can make something of them. Let me know how else I can help.

VM maps.zip

@mepard thanks! Looking! (also, a memory dump would be useful too (saved with procdump)). This dump will help us to determine what exactly is not disposed. Right now I see a huge amount of small blocks (124 Kb each) that are not freed.

Thank you @mepard (once again)!
Upstream issue: https://bugs.webkit.org/show_bug.cgi?id=65768

@vitallium You're most welcome. Thanks for identifying the bug (which seems to fit). I'll grab a procdump on my next run. I'm also trying to build WebKit with !NDEBUG so it'll report ref-count leaks.

@vitallium. You were right! For phantomjs.exe win10 allocated no more than 1gb RAM. After patching phantomjs.exe, he began to use more than 1gb RAM, and Memory exhausted not there , but there was another problems . Performance low and there was a bug with [error] [phantom] Wait timeout of 5000ms expired, exiting.Wait timeout of 5000ms expired, exiting.

@vitallium The dmp file is too big even when zipped.

@vitallium I finally managed to build a debug version on Windows and reproduced the Memory Exhausted. Here's the call stack:

phantomjs.exe!WTFCrash() Line 345 C++
phantomjs.exe!WTF::fastRealloc(void * p, unsigned int n) Line 392 C++
phantomjs.exe!WTF::StringImpl::reallocateInternal<wchar_t>(WTF::PassRefPtrWTF::StringImpl originalString, unsigned int length, wchar_t * & data) Line 231 C++
phantomjs.exe!WTF::StringImpl::reallocate(WTF::PassRefPtrWTF::StringImpl originalString, unsigned int length, wchar_t * & data) Line 246 C++
phantomjs.exe!WTF::StringBuilder::reallocateBuffer<wchar_t>(unsigned int requiredLength) Line 167 C++
phantomjs.exe!WTF::StringBuilder::appendUninitializedSlow<wchar_t>(unsigned int requiredLength) Line 232 C++
phantomjs.exe!WTF::StringBuilder::appendUninitialized<wchar_t>(unsigned int length) Line 218 C++
phantomjs.exe!WTF::StringBuilder::append(const wchar_t * characters, unsigned int length) Line 275 C++
phantomjs.exe!JSC::appendStringToStringBuilder<wchar_t>(WTF::StringBuilder & builder, const wchar_t * data, int length) Line 265 C++
phantomjs.exe!JSC::Stringifier::appendQuotedString(WTF::StringBuilder & builder, const WTF::String & value) Line 315 C++
phantomjs.exe!JSC::Stringifier::appendStringifiedValue(WTF::StringBuilder & builder, JSC::JSValue value, JSC::JSObject * holder, const JSC::PropertyNameForFunctionCall & propertyName) Line 384 C++
phantomjs.exe!JSC::Stringifier::Holder::appendNextProperty(JSC::Stringifier & stringifier, WTF::StringBuilder & builder) Line 556 C++
phantomjs.exe!JSC::Stringifier::appendStringifiedValue(WTF::StringBuilder & builder, JSC::JSValue value, JSC::JSObject * holder, const JSC::PropertyNameForFunctionCall & propertyName) Line 424 C++
phantomjs.exe!JSC::Stringifier::stringify(JSC::Handle value) Line 249 C++
phantomjs.exe!JSC::JSONProtoFuncStringify(JSC::ExecState * exec) Line 822 C++
[External Code]
[Frames below may be incorrect and/or missing]
phantomjs.exe!JSC::JITCode::execute(JSC::JSStack * stack, JSC::ExecState * callFrame, JSC::VM * vm) Line 135 C++
phantomjs.exe!JSC::Interpreter::executeCall(JSC::ExecState * callFrame, JSC::JSObject * function, JSC::CallType callType, const JSC::CallData & callData, JSC::JSValue thisValue, const JSC::ArgList & args) Line 1026 C++
phantomjs.exe!JSC::call(JSC::ExecState * exec, JSC::JSValue functionObject, JSC::CallType callType, const JSC::CallData & callData, JSC::JSValue thisValue, const JSC::ArgList & args) Line 40 C++
phantomjs.exe!WebCore::JSMainThreadExecState::call(JSC::ExecState * exec, JSC::JSValue functionObject, JSC::CallType callType, const JSC::CallData & callData, JSC::JSValue thisValue, const JSC::ArgList & args) Line 56 C++
phantomjs.exe!WebCore::ScheduledAction::executeFunctionInContext(JSC::JSGlobalObject * globalObject, JSC::JSValue thisValue, WebCore::ScriptExecutionContext * context) Line 111 C++
phantomjs.exe!WebCore::ScheduledAction::execute(WebCore::Document * document) Line 133 C++
phantomjs.exe!WebCore::ScheduledAction::execute(WebCore::ScriptExecutionContext * context) Line 82 C++
phantomjs.exe!WebCore::DOMTimer::fired() Line 129 C++
phantomjs.exe!WebCore::ThreadTimers::sharedTimerFiredInternal() Line 132 C++
phantomjs.exe!WebCore::ThreadTimers::sharedTimerFired() Line 106 C++
phantomjs.exe!WebCore::SharedTimerQt::timerEvent(QTimerEvent * ev) Line 114 C++
phantomjs.exe!QObject::event(QEvent * e) Line 1221 C++
phantomjs.exe!QApplicationPrivate::notify_helper(QObject * receiver, QEvent * e) Line 3716 C++
phantomjs.exe!QApplication::notify(QObject * receiver, QEvent * e) Line 3160 C++
phantomjs.exe!QCoreApplication::notifyInternal(QObject * receiver, QEvent * event) Line 965 C++
phantomjs.exe!QCoreApplication::sendEvent(QObject * receiver, QEvent * event) Line 224 C++
phantomjs.exe!QEventDispatcherWin32::event(QEvent * e) Line 1218 C++
phantomjs.exe!QApplicationPrivate::notify_helper(QObject * receiver, QEvent * e) Line 3716 C++
phantomjs.exe!QApplication::notify(QObject * receiver, QEvent * e) Line 3160 C++
phantomjs.exe!QCoreApplication::notifyInternal(QObject * receiver, QEvent * event) Line 965 C++
phantomjs.exe!QCoreApplication::sendEvent(QObject * receiver, QEvent * event) Line 224 C++
phantomjs.exe!QCoreApplicationPrivate::sendPostedEvents(QObject * receiver, int event_type, QThreadData * data) Line 1593 C++
phantomjs.exe!QEventDispatcherWin32::sendPostedEvents() Line 1241 C++
phantomjs.exe!QWindowsGuiEventDispatcher::sendPostedEvents() Line 84 C++
phantomjs.exe!qt_internal_proc(HWND__ * hwnd, unsigned int message, unsigned int wp, long lp) Line 416 C++
[External Code]
phantomjs.exe!QEventDispatcherWin32::processEvents(QFlags flags) Line 818 C++
phantomjs.exe!QWindowsGuiEventDispatcher::processEvents(QFlags flags) Line 74 C++
phantomjs.exe!QEventLoop::processEvents(QFlags flags) Line 129 C++
phantomjs.exe!QEventLoop::exec(QFlags flags) Line 204 C++
phantomjs.exe!QCoreApplication::exec() Line 1229 C++
phantomjs.exe!QGuiApplication::exec() Line 1528 C++
phantomjs.exe!QApplication::exec() Line 2977 C++
phantomjs.exe!inner_main(int argc, char * * argv) Line 73 C++
phantomjs.exe!main(int argc, char * * argv) Line 89 C++
[External Code]

@vitallium I also did two limited runs of 20 and 40 pages to see what the reported leaks were at the end. In both cases, it reported 6 CachedResource leaks and nothing else. I suspect the problem in my case is my own accumulated data (an array of objects) combined with the upstream WebKit bug.

Building with /LARGEADDRESSAWARE in QMAKE_LFLAGS prevents Memory Exhausted in my case. It doesn't seem to decrease performance or change test failures.

@mepard I think the problem is in the main execution context. Because it is a Web Page instance. And because of that closing web pages have no effect on the cache.

@vitallium.
I uninstall phantomjs for windows and install linux mint 17 64x and phantomjs 2.1.1 for linux 86_64x. After this i run my script and i was happy all 200 pages was load, but after 20 minutes phantomjs crashed(((((( and again trouble with memory(((

I happened to find out a little about the Memory Loop on Phantomjs 2.1.
Lets say I have a script which visits 2 pages. I saw the memory loop when the program was visiting first page and somehow the command for visiting 2nd page was there.
I still wonder why phantomjs was not fixed even after 3 years of this issue with 28 participants and why no one was assigned.

Any hope ?

cycle your phantomjs process every 50 requests or so. That's what I did for my SaaS (PhantomJsCloud) and my customers are happy.

A big part of the bugs with PhantomJs seem to be due to it's sometimes allergic interactions with QT. Because of their inter-dependencies I don't think we'll get this problem solved.

@jasonswearingen Thanks for your reply, but what do you mean by cycle ?
sorry if i'm asking something stupid...

btw i saw your SaaS previously, but i can't use it because i'm working with some devices in a local network

commented

@Xsmael, i think he mean "restart" ;)

By cycle I mean restart your phantom process after every so many calls.
This won't work as nicely of you need to store state between calls, but
otherwise out seems (from my experience) the proper mitigation

  • Sent from my mobile

On Nov 20, 2016 11:47 PM, "Ismael OUEDRAOGO" notifications@github.com
wrote:

@jasonswearingen https://github.com/jasonswearingen Thanks for your
reply, but what do you mean by cycle ?
sorry if i'm asking something stupid...

btw i saw your SaaS previously, but i can't use it because i'm working
with some devices in a local network


You are receiving this because you were mentioned.
Reply to this email directly, view it on GitHub
#11390 (comment),
or mute the thread
https://github.com/notifications/unsubscribe-auth/AAxsNgYB4-YwwipcmqE7YoBbFxMRisU3ks5rAUybgaJpZM4AtnDf
.

While restarting Phantom seems the solution, you guys should understand that there is the need of automation to shorten the time of execution. If you restart the process and restore some state somehow, you'll still have to pay a lot for that small restart by huge time. It's like comparing 100ms to 10seconds depending on the machine configuration. I've also tried other solutions like adding some queue process and making the whole thing sync instead of any kind of async process.

Restarting and Adding a better Queue might be pretty good except that no one was assigned to this bug no matter what and I've switched to NightmareJS just because of this silly bug. I found out that using alternative might be the best solution to our base problem. I don't think I (or We) do have the skill and resource to debug the whole thing, so I chose the best way for me.

Now, the only hope is to devote our time to make it better, somehow that is.

@entrptaher So using NightmareJS can you handle 200 pages simultaneously for a long period of time (or for ever!) without the need to "cycle" ?

@Xsmael Both for Phantomjs and Nightmarejs, if you have enough resources, you can do it.
Only that this version of phantomjs had memory leak for years. :/

+1 here

@entrptaher @jasonswearingen @byaka how do you "restart" a phantom process? I couldn't find anything in the docs. It would also be nicer if the docs on the main website could be searched.

Are there any fix for this problem? This issue was opened 4 years ago

@joshterrill you just need to exit the program and start it again. use phantom.exit(0) doc'd here: http://phantomjs.org/api/phantom/method/exit.html

@edgarmarkosov it seems like there is a good enough workaround for this actually, so honestly, while it would be nice to get this fixed, there are plenty of other bugs I'd personally vote to get fixed before this

like:

  • onResourceRecieved not getting post body,
  • pdf's not containing internal links/bookmarks,
  • webfonts crashing phantom,
  • installed system fonts being ignored,
  • etc etc etc

My solution to this problem was:
1. Set page.settings.clearMemoryCaches = true;
2. Use double setTimeout for page.close()

                setTimeout(function() {
                     setTimeout(function() {
                          page.close();
                     }, 1);
                }, 500);
3. Call `page.clearMemoryCache();` before `page.open()`
4. Set  `--load-images=yes` when starting phantomJS process.

@jasonswearingen this workflow helped me to solve problem with page open memory leak in my phantomJS process. Before this workflow memory was reaching 2GB, now memory of my phantomJS process does not reach 150MB. I think that this is only workaround of this problem, because manual releasing of memory is not good solution, and maybe this is not working for all platforms.

Hey,

I'm going to need more info. Here's my code. A lot has been added for testing and some doesn't really make much sense to me. For example, I have tried waiting 5 seconds after clearing the cache before the page is opened and again before it is closed.
Closing the page appears to free some memory, but not all of it. Each time I load a page it takes ~20mb and when the page is closed I get ~10mb back. So each page adds 10mb to the total memory used until the memory is exhausted and the script stops.

Is there something I'm missing?

var linkList = ["1","2","3"]; // there are thousands of these I need to go through.
var urlBase = 'http://test.com/node/';
var linksFinished = 1;
var totalLinks = linkList.length;

function handle_page(url){
	page = require('webpage').create();
	page.settings.clearMemoryCaches = true;
	page.viewportSize = { width: 1200, height: 1000 };
	console.log('Setting error handling');
	page.onConsoleMessage = function (msg) {
		console.log(msg);
	};
	page.onError = function(msg, trace) {
		var msgStack = ['ERROR: ' + msg];
		if (trace && trace.length) {
			msgStack.push('TRACE:');
			trace.forEach(function(t) {
				msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function + '")' : ''));
			});
		}
		// uncomment to log into the console 
		console.error(msgStack.join('\n'));
	};
	console.log('Error handling is set');
	page.clearMemoryCache();
	console.log('memory Cleared before opening page');
	setTimeout(function() {
		 setTimeout(function() {
			page.open(url, function (status) {
				if (status !== 'success') {
					console.log('Unable to load the address! Trying the next one...');
					next_page();
				} else {
					console.log('Opening page '+ url);
					page.includeJs('http://test.com/jquery/jquery-1.8.3.min.js');
					initialize();
				}
			});
		 }, 1);
	}, 5000);
}
		
function next_page(){
    var link = linkList.shift();
	totalLinks = linkList.length;
	url = urlBase.concat(link);
	console.log('URL: ' + url);
    if(!link){
		console.log('link is undefined, we are all done... exiting');
        phantom.exit(0);
    }
    handle_page(url);
}

function initialize() {
	page.evaluate(function() {
		console.log('Starting map click');
		mapDownloadLink = document.getElementById('map-download');
		mapDownloadLink.click();
	});
	waitFor(function() {
			console.log('waiting for image to be done.');
			return page.evaluate(function() {
				if(document.getElementById('fileRequest').className === "FinishedImage")
				{	
					console.log("It is done!");
					return true;
				}
				else
				{
					console.log('not done yet');
				}
			});
        }, function() {
			console.log('Success ' + linksFinished + ' Links are done and ' + totalLinks + ' to go!');
			linksFinished++;
			page.clearMemoryCache();
			page.close();
			setTimeout(function() {
				 setTimeout(function() {
					  
					  next_page();
				 }, 1);
			}, 5000);
        }, 20000);
}
next_page();

I also ran into this issue, after some investigation, I found one source of the leak come from Event#initCustomEvent. CustomEvent is not supported in PhantomJS, so most of the libraries polyfill it with the old fashion initCustomEvent. In case you pass a detail object to this function, that object is leaked.

This is the minimal page I created to reproduce the leak.

<html>
  <body>
    <div class='container'></div>
    <script>
      var event = document.createEvent("CustomEvent");
      event.initCustomEvent("testingtheevent", false, false, {});
      window.dispatchEvent(event);
    </script>
  </body>
</html>

Fortunately, the leak is only for the last parameter of initCustomEvent, the following code looks fine

<html>
  <body>
    <div class='container'></div>
    <script>
      var event = document.createEvent("CustomEvent");
      event.initCustomEvent("testingtheevent", false, false);
      event.detail = {};
      window.dispatchEvent(event);
    </script>
  </body>
</html>

I'm not familiar with PhantomJS code base, and cannot locate the implementation of initCustomEvent. It would be great if the maintainer can help get this leak fixed.

A work around is to inject a polyfill in advance.

function CustomEvent(type, params) {
  var e = document.createEvent('CustomEvent');
  var bubbles = Boolean(params && params.bubbles);
  var cancelable = Boolean(params && params.cancelable);

  e.initCustomEvent(type, bubbles, cancelable);
  if (params && params.detail) {
    e.detail = params.detail;
  }
  return e;
}

My solution was to create a text file with a list of URLs that I wanted to run the script on. Then Create a SECOND file that gets the URL printed to in when Phantom JS does its thing. In my phantom JS script it only cycles through URLs that are in the first document, but not found in the second.

Then I created a windows script that checks if phantomJS is running without errors. If it hits the memory error it crashes and closes. The windows script will wait about a minute between checks by pining home.

Here's the windows script:

@echo off
CD D:
Set "MyApplication=C:\Users\myname\AppData\Local\Microsoft\WindowsApps\phantomjs.exe"
Set "MyProcess=phantomjs.exe"
Color 9B
Title Check phantomjs is running "%MyProcess%"
mode con cols=75 lines=2
:start
tasklist /nh /fi "imagename eq %MyProcess%" 2>nul |find /i "%MyProcess%" >nul
If not errorlevel 1 (Echo "%MyProcess%" is running) else (start "" "%MyApplication%" test.js)
ping -n 60 127.0.0.1 >nul 
goto start

My phantomjs script in this example is called test.js

memory leak still exists

I solved it by closing the instance every time my casperjs program load is finished, I put my code in nodejs, then somebody serves

`cp = require("child_process");

const getUsers = () => {
return new Promise((resolve, reject) => {
return setTimeout(
() =>
resolve([
{ id: "champeta", ini: 1000 },
{ id: "negocios", ini: 1000 },
{ id: "empresa", ini: 1000 },
{ id: "colon", ini: 1000 }
]),
600
);
});
};

let todos = {
pass: "",
inicio: 1000,
limite: 9999,
pantallas: 50
};

const Start = async () => {
const users = await getUsers();
users.map(user => {
console.log(user.id + " " + user.ini);
Ejecutar(user.id, user.ini);
});
};
Start();

function Ejecutar(usuario, inicio) {
let params = [
"demo2.js",
"--user=" + usuario,
"--ini=" + inicio,
"--fin=" + (parseInt(inicio) + 19).toString()
];
console.log("Ejecutando..");
console.log(params);

cp.execFile("casperjs", params, {}, function(_, stdout, stderr) {
console.log(stdout);
console.log(usuario);
if (parseInt(inicio) + 19 <= 9999) {
Ejecutar(usuario, parseInt(inicio) + 20);
}
});
}`

Implementing this is a major undertaking and currently beyond the scope of the project.
You are however free to work on it on your own or sponsor someone to implement it. Once it is in a state where we can consider it for inclusion, please reopen this issue and submit a pull request of the implementation. Thank you!