I\'m using a WebClient
object to send Http Post request to a server.
It\'s sending a huge amount of requests quite rapidly (there is about 4000 messages in a
I hope I'm not late for the party. Anyway, limiting the rate of the request is just one of the problem I faced a week ago as I was creating a crawler. Here are the issues:
Here's the solution:
private Flux sequentialCrawl() {
AtomicLong pageNo = new AtomicLong(2);
// Solution for #1 - Flux.expand
return getHosts(1)
.doOnRequest(value -> LOGGER.info("Start crawling."))
.expand(hostListResponse -> {
final long totalPages = hostListResponse.getData().getTotalPages();
long currPageNo = pageNo.getAndIncrement();
if (currPageNo <= totalPages) {
LOGGER.info("Crawling page " + currPageNo + " of " + totalPages);
// Solution for #2
return Mono.just(1).delayElement(Duration.ofSeconds(1)).then(
getHosts(currPageNo)
);
}
return Flux.empty();
})
.doOnComplete(() -> LOGGER.info("End of crawling."));
}
private Mono getHosts(long pageNo) {
final String uri = hostListUrl + pageNo;
LOGGER.info("Crawling " + uri);
return webClient.get()
.uri(uri)
.exchange()
// Solution for #3
.retryWhen(companion -> companion
.zipWith(Flux.range(1, RETRY + 1), (error, index) -> {
String message = "Failed to crawl uri: " + error.getMessage();
if (index <= RETRY && (error instanceof RequestIntervalTooShortException
|| error instanceof ConnectTimeoutException
|| "Connection reset by peer".equals(error.getMessage())
)) {
LOGGER.info(message + ". Retries count: " + index);
return Tuples.of(error, index);
} else {
LOGGER.warn(message);
throw Exceptions.propagate(error); //terminate the source with the 4th `onError`
}
})
.map(tuple -> {
// Solution for #4
Throwable e = tuple.getT1();
int delaySeconds = tuple.getT2();
// TODO: Adjust these values according to your needs
if (e instanceof ConnectTimeoutException) {
delaySeconds = delaySeconds * 5;
} else if ("Connection reset by peer".equals(e.getMessage())) {
// The API that this app is calling will sometimes think that the requests are SPAM. So let's rest longer before retrying the request.
delaySeconds = delaySeconds * 10;
}
LOGGER.info("Will retry crawling after " + delaySeconds + " seconds to " + uri + ".");
return Mono.delay(Duration.ofSeconds(delaySeconds));
})
.doOnNext(s -> LOGGER.warn("Request is too short - " + uri + ". Retried at " + LocalDateTime.now()))
)
.flatMap(clientResponse -> clientResponse.toEntity(String.class))
.map(responseEntity -> {
HttpStatus statusCode = responseEntity.getStatusCode();
if (statusCode != HttpStatus.OK) {
Throwable exception;
// Convert json string to Java POJO
HostListResponse response = toHostListResponse(uri, statusCode, responseEntity.getBody());
// The API that I'm calling will return error code of 06 if request interval is too short
if (statusCode == HttpStatus.BAD_REQUEST && "06".equals(response.getError().getCode())) {
exception = new RequestIntervalTooShortException(uri);
} else {
exception = new IllegalStateException("Request to " + uri + " failed. Reason: " + responseEntity.getBody());
}
throw Exceptions.propagate(exception);
} else {
return toHostListResponse(uri, statusCode, responseEntity.getBody());
}
});
}