纯文本 空格 字符 回车 替换

java 字符串 获取网页纯文本 及空格 回车替换

痞子三分冷 提交于 2019-12-03 02:22:41
package cn.com.czj.front.utils.http; import java.io.*; import org.apache.commons.lang3.StringUtils; import org.htmlparser.Parser; import org.htmlparser.beans.StringBean; import org.htmlparser.util.ParserException; import org.htmlparser.visitors.HtmlPage; class GetPureWord { public static void main(String[] argv) throws IOException, InterruptedException, ParserException { Parser parser; String body = ""; String title = ""; String url = "http://www.linweikun.com/"; try { parser = new Parser(url); parser.setEncoding("UTF-8"); HtmlPage htmlpage = new HtmlPage(parser); parser.visitAllNodesWith