How do scrape table from the provided website using casperjs?

此生再无相见时 提交于 2019-12-11 06:07:46

问题


The final goal is to retrieve stock data in table form from provided broker website and save it to some text file. Here is the code, that I managed to compile so far by reading few tutorials:

var casper = require("casper").create();
var url = 'https://iqoption.com/en/historical-financial-quotes?active_id=1&tz_offset=60&date=2016-12-19-21-59';

var terminate = function() {
    this.echo("Exiting ...").exit();
};

var processPage = function() {

    var rows = document.querySelectorAll('#mCSB_3_container > table'); //get table from broker site (copy/paste via copy selector in chrome tools)
    //var nodes = document.getElementsByClassName('mCSB_container');

    this.echo(rows);
    this.echo(rows.length);
    for (var i = 0; i < rows.length; i++)
    {
        var cell = rows[i].querySelector('.quotes-table-result__date');
        this.echo(cell); //print each cell
    }  

};

casper.start(url);
casper.waitForSelector('#mCSB_3_container', processPage, terminate);
casper.run();

This code should retrieve the stock price table and print out each cell. However, all what I get is 'undefined', which likely means that I got no objects returned by queryselector call. And please assume that I don't know any web programming (HTML,CSS).


回答1:


First of all, on problem is that the waitFor wasn't set so good, you have to wait for the rows/cells.
The Nodes you get out on this page are a bit wired,if anybody got a more abstract solution where ChildNodes are better handled that in my solution i would be really interested:

var casper = require('casper').create();
var url = 'https://eu.iqoption.com/en/historical-financial-quotes?active_id=1&tz_offset=60&date=2016-12-19-21-59';
var length;

casper.start(url);

casper.then(function() {
    this.waitForSelector('#mCSB_3_container table tbody tr');
});

function getCellContent(row, cell) {
    cellText = casper.evaluate(function(row, cell) {
        return document.querySelectorAll('table tbody tr')[row].childNodes[cell].innerText.trim();
    }, row, cell);
    return cellText;
}

casper.then(function() {
    var rows = casper.evaluate(function() {
        return document.querySelectorAll('table tbody tr');
    });
    length = rows.length;
    this.echo("table length: " + length);
});

// This part can be done nicer, but it's the way it should work ...
casper.then(function() {
    for (var i = 0; i < length; i++) {
        this.echo("Date: " + getCellContent(i, 0));
        this.echo("Bid: " + getCellContent(i, 1));
        this.echo("Ask: " + getCellContent(i, 2));
        this.echo("Quotes: " + getCellContent(i, 3));
    }
});

casper.run();


来源:https://stackoverflow.com/questions/41273739/how-do-scrape-table-from-the-provided-website-using-casperjs

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!