问题
I have a big list of proxy servers (txt file , Format = ip:port in each line) and wrote the code below for checking them:
public static void MyChecker()
{
string[] lines = File.ReadAllLines(txtProxyListPath.Text);
List<string> list_lines = new List<string>(lines);
List<string> list_lines_RemovedDup = new List<string>();
HashSet<string> HS = new HashSet<string>();
int Duplicate_Count = 0;
int badProxy = 0;
int CheckedCount = 0;
foreach (string line in list_lines)
{
string[] line_char = line.Split(':');
string ip = line_char[0];
string port = line_char[1];
if (CanPing(ip))
{
if (SoketConnect(ip, port))
{
if (CheckProxy(ip, port))
{
string ipAndport = ip + ":" + port;
if (HS.Add(ipAndport))
{
list_lines_RemovedDup.Add(ipAndport);
CheckedCount++;
}
else
{
Duplicate_Count++;
CheckedCount++;
}
}
else
{
badProxy++;
CheckedCount++;
}
}
else
{
badProxy++;
CheckedCount++;
}
}
else
{
badProxy++;
CheckedCount++;
}
}
public static bool CanPing(string ip)
{
Ping ping = new Ping();
try
{
PingReply reply = ping.Send(ip, 2000);
if (reply == null)
return false;
return (reply.Status == IPStatus.Success);
}
catch (PingException Ex)
{
return false;
}
}
public static bool SoketConnect(string ip, string port)
{
var is_success = false;
try
{
var connsock = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp);
connsock.SetSocketOption(SocketOptionLevel.Socket, SocketOptionName.SendTimeout, 200);
System.Threading.Thread.Sleep(500);
var hip = IPAddress.Parse(ip);
var ipep = new IPEndPoint(hip, int.Parse(port));
connsock.Connect(ipep);
if (connsock.Connected)
{
is_success = true;
}
connsock.Close();
}
catch (Exception)
{
is_success = false;
}
return is_success;
}
public static bool CheckProxy(string ip, string port)
{
try
{
WebClient WC = new WebClient();
WC.Proxy = new WebProxy(ip, int.Parse(port));
WC.DownloadString("http://SpecificWebSite.com");
return true;
}
catch (Exception)
{
return false;
}
}
But I think I should rewrite these codes because they are very slow.
I have bad delays in these lines :WC.DownloadString("http://SpecificWebSite.com");
andPingReply reply = ping.Send(ip, 2000);
and this is not good for a big list.
Did I write these codes in the right direction or should i change them(which parts)?
how can i optimze them?
thanks in advance
回答1:
There are quite a few things you can improve.
- Don't sleep the thread for half a second.
- Drop the ping check (since the proxy might be behind a firewall and not responding to pings but still working)
- Replace DownloadString with a HttpWebRequest getting the HEAD only.
- Set the timeout of your HttpWebRequest to something lower than default (no need to wait that long. If a proxy doesn't respond within 10-20secs then you probably don't want to use it).
- Split your big list into smaller ones and process them at the same time.
These alone should speed up your process by quite a bit.
As requested, here's an example of how to use HttpWebRequests
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
request.Proxy = null; // set proxy here
request.Timeout = 10000;
request.Method = "HEAD";
using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
{
Console.WriteLine(response.StatusCode);
}
回答2:
I might do something like this:
public static bool TestProxy(string ipAddress, int port, out string errorMsg, out double connectionSeconds) {
Stopwatch stopWatch = new Stopwatch();
errorMsg = "";
connectionSeconds = -1;
try {
stopWatch.Start();
var client = new RestClient("https://webapi.theproxisright.com/");
client.Proxy = new WebProxy(ipAddress, port);
var request = new RestRequest("api/ip", Method.GET);
request.Timeout = 10000;
request.RequestFormat = DataFormat.Json;
var response = client.Execute(request);
if (response.ErrorException != null) {
throw response.ErrorException;
}
return (response.Content == ipAddress);
} catch (Exception ex) {
errorMsg = ex.Message;
return false;
} finally {
stopWatch.Stop();
connectionSeconds = stopWatch.Elapsed.TotalSeconds;
}
}
Using a WhatIsMyIP-like REST service (I use one from https://TheProxIsRight.com).
Then As suggested above, I might try parallelize it with something like:
Task.Factory.StartNew(() => {
try {
string errorMsg;
double connectionTime;
var success = TestProxy("1.2.3.4",3128, out errorMsg, out connectionTime);
//Log Result
} catch (Exception ex) {
//Log Error
}
});
Note, one can also use the REST API on the above site to query for working proxies: https://theproxisright.com/#apidemo
(Disclosure, I worked on the above site)
来源:https://stackoverflow.com/questions/12249702/what-is-the-fastest-way-for-checking-a-big-proxy-list-on-a-specific-web-site