<dependency> <groupId>org.apache.httpcomponents</groupId> <artifactId>httpclient</artifactId> <version>4.5.12</version> </dependency> <dependency> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId> <version>1.13.1</version> </dependency> <dependency> <groupId>commons-io</groupId> <artifactId>commons-io</artifactId> <version>2.7</version> </dependency> <dependency> <groupId>org.apache.commons</groupId> <artifactId>commons-lang3</artifactId> <version>3.10</version> </dependency>
package com.download.util; import org.apache.commons.io.FileUtils; import org.apache.http.HttpEntity; import org.apache.http.StatusLine; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.util.EntityUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import java.io.File; import java.io.IOException; import java.net.URL; import java.util.UUID; /** * @author Administrator */ public class DownloadVideoAndImage { public static void main(String[] args) { //开始页码 int startPage = 1; //结束页码 int endPage = 5; //保存路径 String savePath = "D:" + File.separator + "Desktop" + File.separator + "download"; //设置编码 String charset = "utf-8"; Thread thread1 = new Thread(new Runnable() { @Override public void run() { for (int i = 1; i < endPage; i++) { String url = "https://ibaotu.com/shipin/7-0-0-0-0-" + i + ".html"; System.out.println("正在下载第" + i + "页video。"); try { downloadVideoByURL(url, savePath, charset); } catch (Exception e) { e.printStackTrace(); } System.out.println("第" + i + "页video下载完成"); } } }); Thread thread2 = new Thread(new Runnable() { @Override public void run() { try { downloadVideoByPage(startPage, savePath, charset); } catch (Exception e) { e.printStackTrace(); } } }); Thread thread3 = new Thread(new Runnable() { @Override public void run() { try { downloadImagePage(startPage, savePath, charset); } catch (Exception e) { e.printStackTrace(); } } }); thread1.start(); thread2.start(); thread3.start(); } /** * @param url 下载的目标地址 * @param savePath 保存的路径 * @param charset 编码 * @throws Exception */ public static void downloadVideoByURL(String url, String savePath, String charset) throws IOException { //根据URL获取html String content = getHtmlByURL(url, charset); //根据获取到的html得到Document对象 Document document = getDocumentByHtml(content); //用标签选择器选择需要下载的元素 Elements elements = document.select("div.media-list div.video-play video"); //创建保存文件夹 createSaveFileFolder(savePath); for (Element element : elements) { String videoUrl = element.attr("src"); if (!(videoUrl.startsWith("https:"))) { videoUrl = "https:" + videoUrl; System.out.println(videoUrl); String uuid = getUUIDString(); FileUtils.copyURLToFile(new URL(videoUrl), new File(savePath, "downloadVideoByURL" + File.separator + uuid + ".mp4")); } } System.out.println("video下载完毕"); } /** * 通过递归调用下载 * * @param page 当前页码 * @param savePath 保存路径 * @param charset 编码 * @throws Exception */ public static void downloadVideoByPage(int page, String savePath, String charset) throws Exception { System.out.println("downloadVideoByPage开始下载第" + page + "页video。"); //目标url String url = "https://ibaotu.com/shipin/7-0-0-0-0-" + page + ".html"; //获取Html String content = getHtmlByURL(url, charset); //获取Document对象 Document document = getDocumentByHtml(content); //通过select选择器选取所需元素 Elements elements1 = document.select("div.media-list div.video-play video"); //创建保存文件夹 createSaveFileFolder(savePath); for (Element element : elements1) { String videoUrl = element.attr("src"); if (!videoUrl.startsWith("https:")) { videoUrl = "https:" + videoUrl; System.out.println(videoUrl); String uuid = getUUIDString(); FileUtils.copyURLToFile(new URL(videoUrl), new File(savePath, "downloadVideoByPage" + File.separator + uuid + ".mp4")); } } System.out.println("downloadVideoByPage第" + page + "页video下载完毕。"); page++; downloadVideoByPage(page, savePath, charset); } /** * 通过递归调用下载 * * @param page 当前页码 * @param savePath 保存路径 * @param charset 编码 * @throws Exception */ public static void downloadImagePage(int page, String savePath, String charset) throws Exception { System.out.println("downloadImagePage正在下载第" + page + "页image。"); //目标URL String url = "https://ibaotu.com/shipin/7-0-0-0-0-" + page + ".html"; //获取Html String content = getHtmlByURL(url, charset); //获取Document对象 Document document = getDocumentByHtml(content); //通过select选择器选取所需元素 Elements elements = document.select("div.media-list div.show-image>img "); //创建保存文件夹 createSaveFileFolder(savePath); for (Element element : elements) { String imgUrl = element.attr("data-url"); //imgUrl不以https开头,前面拼接https if (!imgUrl.startsWith("https:")) { imgUrl = "https:" + imgUrl; System.out.println(imgUrl); String uuid = getUUIDString(); FileUtils.copyURLToFile(new URL(imgUrl), new File(savePath, "downloadImagePage" + File.separator + uuid + ".jpg")); } } System.out.println("downloadImagePage第" + page + "页image下载完毕。"); page++; downloadImagePage(page, savePath, charset); } /** * 通过URL获取html页面 * * @param url 目标url * @return * @throws IOException */ public static String getHtmlByURL(String url, String charset) throws IOException { //创建HttpClient CloseableHttpClient httpClient = HttpClients.createDefault(); //获取连接 HttpGet httpGet = new HttpGet(url); //获取响应 CloseableHttpResponse httpResponse = httpClient.execute(httpGet); //获取响应状态码 StatusLine statusLine = httpResponse.getStatusLine(); int statusCode = statusLine.getStatusCode(); String content = null; //状态码200代表连接成功 int ok = 200; if (statusCode == ok) { //获取响应实体 HttpEntity entity = httpResponse.getEntity(); return EntityUtils.toString(entity, charset); } return "网络错误,请重试"; } /** * 获取Document对象 * * @param html * @return */ public static Document getDocumentByHtml(String html) { return Jsoup.parse(html); } /** * 创建保存文件夹 * * @param savePath 保存路径 */ public static void createSaveFileFolder(String savePath) { File file = new File(savePath); //保存路径没有以路径结尾,添加路径结尾 if (!(savePath.endsWith(File.separator))) { savePath = savePath + File.separator; } //文件夹不存在,创建 if ((!file.exists())) { file.mkdirs(); } } /** * 生成UUID字符串并去除- * * @return */ public static String getUUIDString() { return UUID.randomUUID().toString().replace("-", ""); } }
来源:oschina
链接:https://my.oschina.net/u/4459974/blog/4310648