NodeJS: Trouble scraping two URLs with promises

前端 未结 1 1189
一向
一向 2020-12-07 04:13

I\'m scraping r/theonion and writing the titles to a text file, onion.txt. After that, I am intending to scrape r/nottheonion and writing the titles to a text file, nottheon

相关标签:
1条回答
  • 2020-12-07 04:47

    Use request-promise and fs-promise to simplify your code if you want to use promises anyway, and use function to not repeat yourself.

    var rp = require('request-promise');
    var fsp = require('fs-promise');
    
    var onion_url = "https://www.reddit.com/r/theonion";
    var not_onion_url = "https://www.reddit.com/r/nottheonion";
    
    function parse(html) {
        var result = '';
        var $ = cheerio.load(html);
        $("div#siteTable > div.link").each(function(idx) {
            var title = $(this).find('p.title > a.title').text().trim();
            console.log(title);
            result += title + '\n';
        });
        return result;
    }
    
    var append = file => content => fsp.appendFile(file, content);
    
    rp(onion_url)
      .then(parse)
      .then(append('onion.txt'))
      .then(() => console.log('Success'))
      .catch(err => console.log('Error:', err));
    
    rp(not_onion_url)
      .then(parse)
      .then(append('not_onion.txt'))
      .then(() => console.log('Success'))
      .catch(err => console.log('Error:', err));
    

    This is not tested.

    0 讨论(0)
提交回复
热议问题