HTML Tag Parsing

前端 未结 3 2048
日久生厌
日久生厌 2020-12-05 01:02

How can I parse Name: & Value text from within the tag with DIHtmlParser? I tried doing it with TCLHtmlParser from Clever Components but it failed. Seco

3条回答
  •  春和景丽
    2020-12-05 01:47

    You could use IHTMLDocument2 DOM to parse whatever elements you need from the HTML:

    uses ActiveX, MSHTML;
    
    const
      HTML =
      '
    ' + '' + 'Value' + '
    ' + '
    '; procedure TForm1.Button1Click(Sender: TObject); var doc: OleVariant; el: OleVariant; i: Integer; begin doc := coHTMLDocument.Create as IHTMLDocument2; doc.write(HTML); doc.close; ShowMessage(doc.body.innerHTML); for i := 0 to doc.body.all.length - 1 do begin el := doc.body.all.item(i); if (el.tagName = 'LABEL') and (el.className = 'tvLabel') then ShowMessage(el.innerText); if (el.tagName = 'SPAN') and (el.className = 'tvValue') then ShowMessage(el.innerText); end; end;

    I wanted to mention another very nice HTML parser I found today: htmlp (Delphi Dom HTML Parser and Converter). It's not as flexible as the IHTMLDocument2 obviously, but it's very easy to work with, fast, free, and supports Unicode for older Delphi versions.

    Sample usage:

    uses HtmlParser, DomCore;
    
    function GetDocBody(HtmlDoc: TDocument): TElement;
    var
      i: integer;
      node: TNode;
    begin
      Result := nil;
      for i := 0 to HtmlDoc.documentElement.childNodes.length - 1 do
      begin
        node := HtmlDoc.documentElement.childNodes.item(i);
        if node.nodeName = 'body' then
        begin
          Result := node as TElement;
          Break;
        end;
      end;
    end;
    
    procedure THTMLForm.Button2Click(Sender: TObject);
    var
      HtmlParser: THtmlParser;
      HtmlDoc: TDocument;
      i: Integer;
      body, el: TElement;
      node: TNode;
    begin
      HtmlParser := THtmlParser.Create;
      try
        HtmlDoc := HtmlParser.parseString(HTML);
        try
          body := GetDocBody(HtmlDoc);
          if Assigned(body) then
            for i := 0 to body.childNodes.length - 1 do
            begin
              node := body.childNodes.item(i);
              if (node is TElement) then
              begin
                el := node as TElement;
                if (el.tagName = 'div') and (el.GetAttribute('class') = 'tvRow tvFirst hasLabel tvFirst') then
                begin
                  // iterate el.childNodes here...
                  ShowMessage(IntToStr(el.childNodes.length));
                end;
              end;
            end;
        finally
          HtmlDoc.Free;
        end;
      finally
        HtmlParser.Free
      end;
    end;
    

提交回复
热议问题