Download a file using Nightmare

冷暖自知 提交于 2019-11-28 23:55:06

PhantomJS (and CasperJS and Nightmare) don't trigger a download (dialog) when you click on something that should be downloaded. So, it is necessary to download it yourself. If you can find out the URL of the file, then it can be easily downloaded using an XMLHttpRequest from the page context.

So you need to exchange

.click('a[href="/digitaleeditie/helekrant/epub/nrc_20141124.epub"]')

for

.evaluate(function ev(){
    var el = document.querySelector("[href*='nrc_20141124.epub']");
    var xhr = new XMLHttpRequest();
    xhr.open("GET", el.href, false);
    xhr.overrideMimeType("text/plain; charset=x-user-defined");
    xhr.send();
    return xhr.responseText;
}, function cb(data){
    var fs = require("fs");
    fs.writeFileSync("book.epub", data, "binary");
})

You can also use the newer way of requesting binary data.

.evaluate(function ev(){
    var el = document.querySelector("[href*='.pdf']");
    var xhr = new XMLHttpRequest();
    xhr.open("GET", el.href, false);
    xhr.responseType = "arraybuffer";
    xhr.send();

    var bytes = [];
    var array = new Uint8Array(xhr.response);
    for (var i = 0; i < array.length; i++) {
        bytes[i] = array[i];
    }
    return bytes;
}, function cb(data){
    var fs = require("fs");
    fs.writeFileSync("book.epub", new Buffer(data), "binary");
})

Both of the ways are described on MDN. Here is a sample script which shows a proof of concept.

There is a Nightmare download plugin. You can download the file just with this code below:

var Nightmare = require('nightmare');
require('nightmare-download-manager')(Nightmare);
var nightmare = Nightmare();
nightmare.on('download', function(state, downloadItem){
  if(state == 'started'){
    nightmare.emit('download', '/some/path/file.zip', downloadItem);
  }
});

nightmare
  .downloadManager()
  .goto('https://github.com/segmentio/nightmare')
  .click('a[href="/segmentio/nightmare/archive/master.zip"]')
  .waitDownloadsComplete()
  .then(() => {
    console.log('done');
  });

I got my downloads super easy using the request module, as described here.

var Nightmare = require('nightmare');
var fs = require('fs');
var request = require('request');

new Nightmare()
  .goto('https://login.nrc.nl/login?service=http://digitaleeditie.nrc.nl/welkom')
  .insert('input[name="username"]', 'Username')
  .insert('input[name="password"]','Password')
  .click('button[type="submit"]')
  .wait()
  .goto('http://digitaleeditie.nrc.nl/digitaleeditie/NH/2014/10/20141124___/downloads.html')
  .wait()
  .then(function () {
    download('http://digitaleeditie.nrc.nl/digitaleeditie/helekrant/epub/nrc_20141124.epub', 'myBook.epub', function () {
      console.log('done');
    });
  })
  .catch(function (err) {
    console.log(err);
  })

function download(uri, filename, callback) {
  request.head(uri, function () {
    request(uri).pipe(fs.createWriteStream(filename)).on('close', callback);
  });
}

Run npm i request in order to use request.

Nightmare will download it properly if you click on the download link.

const Nightmare         = require('nightmare');
const show              = ( process.argv[2].includes("true") ) ? true : false;
const nightmare         = Nightmare( { show: show } );

nightmare
    .goto("https://github.com/segmentio/nightmare")
    .click('a[href="/segmentio/nightmare/archive/master.zip"]')
    .end(() => "Done!")
    .then((value) => console.log(value));
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!