PhantomJS Node - page.open - cannot keep track of multiple pages

£可爱£侵袭症+ 提交于 2019-12-20 04:26:13

问题


I'm using Phantom Node to interface node with PhantomJS. I'm trying to open pages in parallel, but the issue is that page.open callback function does not pass back the reference to the page, so I don't have a way to know which page has completed.

Relevant Code

self.queue[j].page.open.call( self.queue[j].page, rows[i].url, function( status )
{
   console.log( this ) // <-- returns undefined
   // So how do I keep track of which pages have finished loading?
   // The only variable I have available here is `status`
});

Full Function Code:

SnapEngine.prototype.processSnaps = function( rows, type )
{
var self = this;

if ( ! rows || rows.length === 0 ) return true;

for( var i = 0; i < rows.length; i++ )
{
    // If queue is full, stop processing and wait for next snap engine iteration
    if ( self.getAvailableSizeInQueue() <= 0 )
    {
        self.logger.info( 'Queue is full for signature snap processing' );
        return true;
    }

    // Snapshots are processed by url, if multiple duplication urls are requested, all are updated after one of them is complete
    // So if a url is already being processed, don't reprocess it
    if ( self.findUrlInQueue( rows[i].url ) !== false )
    {
        self.logger.info( 'URL already being processed', url );
        continue;
    }

    for( j = 0; j < self.queue.length; j++ )
    {
        // Find an unused page object
        if ( self.queue[j] && self.queue[j].hasOwnProperty( 'page' ) && ( ! self.queue[j].page.url || self.queue[j].page.url == '' ))
        {
            self.logger.info( 'Opening URL in browser', rows[i].url );

            // Start loading page
            self.queue[j].page.open.call( self.queue[j].page, rows[i].url, function( status )
            {
                // ===== ISSUE HERE =====
                var url = this.url; // <-- this is undefined
                // ======================

                self.resetPage( self.queue[ index ]);

                if ( status === 'success' )
                {
                    self.updateStatus( url, 'ready' );
                }
                else
                {
                    self.updateStatus( url, 'failed' );
                }

                self.removeUrlFromQueue( url )
            });

            self.updateStatus( rows[i].url, 'processing' );
            break;
        }
    }
}
}

回答1:


Try it like this:

I added a function that is directly executed around the part that opens the page, thus introducing a new scope. Therefore url won't get mangled (you cannot use rows[i].url as i will change before your callback is called) and will be available in your callback.

for( j = 0; j < self.queue.length; j++ )
{
    // Find an unused page object
    if ( self.queue[j] && self.queue[j].hasOwnProperty( 'page' ) && ( ! self.queue[j].page.url || self.queue[j].page.url == '' ))
    {
        self.logger.info( 'Opening URL in browser', rows[i].url );
        (function() {
            var url = rows[i].url;
            // Start loading page
            self.queue[j].page.open.call( self.queue[j].page, url, function( status )
            {                   
                self.resetPage( self.queue[ index ]);

                if ( status === 'success' )
                {
                    self.updateStatus( url, 'ready' );
                }
                else
                {
                    self.updateStatus( url, 'failed' );
                }

                self.removeUrlFromQueue( url )
            });
        })();

        self.updateStatus( rows[i].url, 'processing' );
        break;
    }
}


来源:https://stackoverflow.com/questions/15086360/phantomjs-node-page-open-cannot-keep-track-of-multiple-pages

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!