Element to string in HTMLDocument

前端 未结 1 1083
执笔经年
执笔经年 2020-12-04 03:01

i have a Element object its a HTMLDocument object and i want to string value of this element.

i want this result

Christina Toth, Pharm. D.

===============

相关标签:
1条回答
  • 2020-12-04 03:29

    Try this instead.

    Edited to use the read() method of HTMLEditorKit.

    import java.io.StringReader;
    import javax.swing.text.AttributeSet;
    import javax.swing.text.Element;
    import javax.swing.text.ElementIterator;
    import javax.swing.text.StyleConstants;
    import javax.swing.text.html.HTML;
    import javax.swing.text.html.HTMLDocument;
    import javax.swing.text.html.HTMLEditorKit;
    
    public class NewMain {
    
        public static void main(String args[]) throws Exception {
            HTMLEditorKit htmlKit = new HTMLEditorKit();
            HTMLDocument htmlDoc = (HTMLDocument) htmlKit.createDefaultDocument();
            htmlKit.read(new StringReader(text), htmlDoc, 0);
            // Parse
            ElementIterator iterator = new ElementIterator(htmlDoc);
            Element element;
            while ((element = iterator.next()) != null) {
                AttributeSet as = element.getAttributes();
                Object name = as.getAttribute(StyleConstants.NameAttribute);
                if (name == HTML.Tag.DIV) {
                    StringBuffer sb = new StringBuffer();
                    sb.append(name).append(": ");
                    int count = element.getElementCount();
                    for (int i = 0; i < count; i++) {
                        Element child = element.getElement(i);
                        int startOffset = child.getStartOffset();
                        int endOffset = child.getEndOffset();
                        int length = endOffset - startOffset;
                        sb.append(htmlDoc.getText(startOffset, length));
                    }
                    System.out.println(sb);
                }
            }
        }
        private static String text
            = "<html>\n"
            + "<head>\n"
            + "<title>pg_0001</title>\n"
            + "\n"
            + "<style type=\"text/css\">\n"
            + ".ft3{font-style:normal;font-weight:bold;font-size:11px;"
            + "font-family:Helvetica;color:#000000;}\n"
            + "</style>\n"
            + "</head>\n"
            + "<body vlink=\"#FFFFFF\" link=\"#FFFFFF\" bgcolor=\"#ffffff\">\n"
            + "\n"
            + "\n"
            + "<div style=\"position:absolute;top:597;left:252\"><nobr><span "
            + "class=\"ft3\">Christina Toth, Pharm. D.</span></nobr></div>\n"
            + "\n"
            + "\n"
            + "</body>\n"
            + "</html>";
    }
    

    Console:

    div: Christina Toth, Pharm. D.
    0 讨论(0)
提交回复
热议问题