使用rxjs模拟爬虫
首先确定一个接口方法, 该方法随机[0,3]s 完成接口请求
源数据默认16条, 每组4个, 分为4组
使用promise.all用时总计为四组中每组最长时间之和
const {range, from, of} = require('rxjs')
const {concatMap, reduce, tap, toArray, map, scan, finalize, bufferCount} = require('rxjs/operators')
const fs = require('fs')
// 每个api会花费一秒
function download(val) {
let time = Math.random() * 3000
return new Promise(
resolve => {
setTimeout(
() => {
console.log('down', val)
let p = `./tmp/${val}.txt`
fs.writeFileSync(p, val)
resolve(
{
done: true,
val,
time
}
)
}, time
)
}
)
}
let st = +new Date()
range(1, 16).pipe(
bufferCount(4),
concatMap(items => {
let list = items.map(x => download(x))
return from(Promise.all(list))
}),
map(list => {
let max = Math.max(...list.map(x => x.time))
return {list, max}
}),
tap(x => console.log('tap', x)),
toArray(),
).subscribe(list => {
let ed = +new Date()
console.log('time', ed - st)
let sum = list.map(x => x.max).reduce((pre, cur) => pre + cur, 0)
console.log('sum', sum)
})
一次执行结果分析
接口返回并非按照顺序, 因为接口完成时间是随机的
最后将结果导出为数据, 并对每组的最长时间求和, 可以看到与计时器算出的时间是比较吻合的
down 2
down 3
down 1
down 4
tap {
list: [
{ done: true, val: 1, time: 2220.1364535533085 },
{ done: true, val: 2, time: 632.042034213453 },
{ done: true, val: 3, time: 1919.264377120481 },
{ done: true, val: 4, time: 2622.156246436889 }
],
max: 2622.156246436889
}
down 7
down 8
down 6
down 5
tap {
list: [
{ done: true, val: 5, time: 1921.398594902853 },
{ done: true, val: 6, time: 1710.466478612874 },
{ done: true, val: 7, time: 125.57895954206488 },
{ done: true, val: 8, time: 753.0369241776438 }
],
max: 1921.398594902853
}
down 10
down 12
down 9
down 11
tap {
list: [
{ done: true, val: 9, time: 1845.7874685388945 },
{ done: true, val: 10, time: 1054.4201790714242 },
{ done: true, val: 11, time: 2598.3138444275805 },
{ done: true, val: 12, time: 1506.6078968696627 }
],
max: 2598.3138444275805
}
down 13
down 14
down 15
down 16
tap {
list: [
{ done: true, val: 13, time: 1885.195610068538 },
{ done: true, val: 14, time: 2360.3281655495925 },
{ done: true, val: 15, time: 2770.8347520387047 },
{ done: true, val: 16, time: 2868.1715082265905 }
],
max: 2868.1715082265905
}
time 10017
sum 10010.040193993913
Process finished with exit code 0
来源:oschina
链接:https://my.oschina.net/ahaoboy/blog/3171015