Using Java, how can I extract all the links from a given web page?
import java.io.*;
import java.net.*;
public class NameOfProgram {
public static void main(String[] args) {
URL url;
InputStream is = null;
BufferedReader br;
String line;
try {
url = new URL("http://www.stackoverflow.com");
is = url.openStream(); // throws an IOException
br = new BufferedReader(new InputStreamReader(is));
while ((line = br.readLine()) != null) {
if(line.contains("href="))
System.out.println(line.trim());
}
} catch (MalformedURLException mue) {
mue.printStackTrace();
} catch (IOException ioe) {
ioe.printStackTrace();
} finally {
try {
if (is != null) is.close();
} catch (IOException ioe) {
//exception
}
}
}
}