rxjs6 模拟实现爬虫

跟風遠走 提交于 2020-02-28 09:32:15

使用rxjs模拟爬虫

 

首先确定一个接口方法, 该方法随机[0,3]s 完成接口请求

源数据默认16条, 每组4个, 分为4组

使用promise.all用时总计为四组中每组最长时间之和

const {range, from, of} = require('rxjs')
const {concatMap, reduce, tap, toArray, map, scan, finalize, bufferCount} = require('rxjs/operators')
const fs = require('fs')

// 每个api会花费一秒
function download(val) {
  let time = Math.random() * 3000
  return new Promise(
    resolve => {
      setTimeout(
        () => {
          console.log('down', val)
          let p = `./tmp/${val}.txt`
          fs.writeFileSync(p, val)
          resolve(
            {
              done: true,
              val,
              time
            }
          )
        }, time
      )
    }
  )
}

let st = +new Date()
range(1, 16).pipe(
  bufferCount(4),
  concatMap(items => {
    let list = items.map(x => download(x))
    return from(Promise.all(list))
  }),
  map(list => {
    let max = Math.max(...list.map(x => x.time))
    return {list, max}
  }),
  tap(x => console.log('tap', x)),
  toArray(),
).subscribe(list => {
  let ed = +new Date()
  console.log('time', ed - st)
  let sum = list.map(x => x.max).reduce((pre, cur) => pre + cur, 0)
  console.log('sum', sum)
})

 

一次执行结果分析

接口返回并非按照顺序, 因为接口完成时间是随机的

最后将结果导出为数据, 并对每组的最长时间求和, 可以看到与计时器算出的时间是比较吻合的

down 2
down 3
down 1
down 4
tap {
  list: [
    { done: true, val: 1, time: 2220.1364535533085 },
    { done: true, val: 2, time: 632.042034213453 },
    { done: true, val: 3, time: 1919.264377120481 },
    { done: true, val: 4, time: 2622.156246436889 }
  ],
  max: 2622.156246436889
}
down 7
down 8
down 6
down 5
tap {
  list: [
    { done: true, val: 5, time: 1921.398594902853 },
    { done: true, val: 6, time: 1710.466478612874 },
    { done: true, val: 7, time: 125.57895954206488 },
    { done: true, val: 8, time: 753.0369241776438 }
  ],
  max: 1921.398594902853
}
down 10
down 12
down 9
down 11
tap {
  list: [
    { done: true, val: 9, time: 1845.7874685388945 },
    { done: true, val: 10, time: 1054.4201790714242 },
    { done: true, val: 11, time: 2598.3138444275805 },
    { done: true, val: 12, time: 1506.6078968696627 }
  ],
  max: 2598.3138444275805
}
down 13
down 14
down 15
down 16
tap {
  list: [
    { done: true, val: 13, time: 1885.195610068538 },
    { done: true, val: 14, time: 2360.3281655495925 },
    { done: true, val: 15, time: 2770.8347520387047 },
    { done: true, val: 16, time: 2868.1715082265905 }
  ],
  max: 2868.1715082265905
}
time 10017
sum 10010.040193993913

Process finished with exit code 0

 

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!