I am working on a WebCrawler implementation but am facing a strange memory leak in ASP.NET Web API\'s HttpClient.
So the cut down version is here:
I'm no good at defining memory issues but I gave it a try with the following code. It's in .NET 4.5 and uses async/await feature of C#, too. It seems to keep memory usage around 10 - 15 MB for the entire process (not sure if you see this a better memory usage though). But if you watch # Gen 0 Collections, # Gen 1 Collections and # Gen 2 Collections perf counters, they are pretty high with the below code.
If you remove the GC.Collect calls below, it goes back and forth between 30MB - 50MB for entire process. The interesting part is that when I run your code on my 4 core machine, I don't see abnormal memory usage by the process either. I have .NET 4.5 installed on my machine and if you don't, the problem might be related to CLR internals of .NET 4.0 and I am sure that TPL has improved a lot on .NET 4.5 based on resource usage.
class Program {
static void Main(string[] args) {
ServicePointManager.DefaultConnectionLimit = 500;
CrawlAsync().ContinueWith(task => Console.WriteLine("***DONE!"));
Console.ReadLine();
}
private static async Task CrawlAsync() {
int numberOfCores = Environment.ProcessorCount;
List requestUris = File.ReadAllLines(@"C:\Users\Tugberk\Downloads\links.txt").ToList();
ConcurrentDictionary> tasks = new ConcurrentDictionary>();
List requestsToDispose = new List();
var httpClient = new HttpClient();
for (int i = 0; i < numberOfCores; i++) {
string requestUri = requestUris.First();
var requestMessage = new HttpRequestMessage(HttpMethod.Get, requestUri);
Task task = MakeCall(httpClient, requestMessage);
tasks.AddOrUpdate(task.Id, Tuple.Create(task, requestMessage), (index, t) => t);
requestUris.RemoveAt(0);
}
while (tasks.Values.Count > 0) {
Task task = await Task.WhenAny(tasks.Values.Select(x => x.Item1));
Tuple removedTask;
tasks.TryRemove(task.Id, out removedTask);
removedTask.Item1.Dispose();
removedTask.Item2.Dispose();
if (requestUris.Count > 0) {
var requestUri = requestUris.First();
var requestMessage = new HttpRequestMessage(HttpMethod.Get, requestUri);
Task newTask = MakeCall(httpClient, requestMessage);
tasks.AddOrUpdate(newTask.Id, Tuple.Create(newTask, requestMessage), (index, t) => t);
requestUris.RemoveAt(0);
}
GC.Collect(0);
GC.Collect(1);
GC.Collect(2);
}
httpClient.Dispose();
}
private static async Task MakeCall(HttpClient httpClient, HttpRequestMessage requestMessage) {
Console.WriteLine("**Starting new request for {0}!", requestMessage.RequestUri);
var response = await httpClient.SendAsync(requestMessage).ConfigureAwait(false);
Console.WriteLine("**Request is completed for {0}! Status Code: {1}", requestMessage.RequestUri, response.StatusCode);
using (response) {
if (response.IsSuccessStatusCode){
using (response.Content) {
Console.WriteLine("**Getting the HTML for {0}!", requestMessage.RequestUri);
string html = await response.Content.ReadAsStringAsync().ConfigureAwait(false);
Console.WriteLine("**Got the HTML for {0}! Legth: {1}", requestMessage.RequestUri, html.Length);
}
}
else if (response.Content != null) {
response.Content.Dispose();
}
}
}
}