convert HTML DOM structure to JSON

后端 未结 5 1507
太阳男子
太阳男子 2020-12-05 21:27

I have wasted so much time on this..the recursion part is quite illusive.
for a given HTML structure, of unknown depth, I need to convert to JSON.
(I use this

相关标签:
5条回答
  • 2020-12-05 21:42

    I'm new here and i couldn't find how to post a comment. I wanted to ask you if this is always the structure, no matter the dept. If the answer is no, then don't read my answer :).

    So first of all i added a function getPrevious, because directly trying to get the previous sibling returns you a text node. Next i changed the recursion a little bit, because it's not a simple recursion, the json format (the parent-child relations) is different then the html format. I tried it for 2 more levels and it's ok. I hope it's helpful and sorry if it's not.

        function getPrevious(element)
        {
            var prev_el = element.previousSibling;
            while (prev_el.nodeType == 3)
            {
                prev_el = prev_el.previousSibling;
            }
            return prev_el;
        }
    
        function recursive(element){
            //var classname = element.className.split(' ');
            // element.nodeName == 'UL'
            var Result = {"title": '', "json": {}};
            var json = {};
            var cur_json_key = '';
            if( element.nodeType == 3 )
                return;
            else{
                //console.log( element.nodeType, element );
    
                var nodeName = element.nodeName.toLowerCase();
                var nodeClass = element.className.toLowerCase();
    
                // if this is the SPAN with class 'TITLE', then create an object with the innerHTML as KEY
                // and later the value should be another object, returned from the recursion...
                if( nodeName == 'span' && nodeClass == 'title' ){
                    json[element.innerHTML] = {};
                    Result.title = element.innerHTML;
                    Result.json = json;
                }
                else
                if( nodeName == 'input' ){
                    // if this is an INPUT field, then the SPAN sibling before it is the KEY.
                    var key = getPrevious(element).innerHTML;
                    var val = element.value;
                    Result.json[key] = val;
                }
                else
                {
                    var is_title_found = 0;
                    var title_found = '';
                    var res = {}
                    // go deeper
                    for( var child=0; child < element.childNodes.length; child++ ){
                        //json = $.extend( {}, recursive( element.childNodes[child] ));
                        res = recursive( element.childNodes[child]);
                        if (res)
                        {
                            if (res.title != '')
                            {
                                is_title_found = 1;
                                title_found = res.title;
                            }
                            else
                            {
                                $.extend(true, json, res.json);
                            }
                            console.log(JSON.stringify(json));
                        }
                    }
                    if (title_found)
                    {
                        Result.json[title_found] = json
                    }
                    else
                    {
                        Result.json = json;
                    }
                }
                return Result;
            }
        }
    
    0 讨论(0)
  • 2020-12-05 21:42

    Live Example

    var ul = document.body.firstElementChild;
    // cheat to only extract the value (key is undefined)
    var data = extractKeyValue({}, ul)[1];
    
    
    function extractKeyValue(span, thing) {
      // return key & input value
      if (thing.tagName === "INPUT") {
          return [span.textContent, thing.value];
      } else {
        // recurse over every li and return the key/value of the span + thing
        var obj = {};
        [].forEach.call(thing.children, function (li) {
          var span = li.firstElementChild;
          var thing = span.nextElementSibling;
          // tuple is [key, value]
          var tuple = extractKeyValue(span, thing);
          obj[tuple[0]] = tuple[1];
        });
        return [span.textContent, obj];
      }
    }
    
    0 讨论(0)
  • 2020-12-05 21:44

    If you can convince yourself to using jQuery, try this:

    function helper(root) {
      var result = {};
    
      $('> ul > li > span', root).each(function () {
        result[$(this).text()] = $(this).hasClass('title') ? helper($(this).parent()) : $(this).next('input').val();
      });
    
      return result;
    }
    
    console.log(helper('body'));
    
    0 讨论(0)
  • 2020-12-05 21:47

    Try this:

    function helper(root) {
      var result = {};
    
      root.querySelectorAll(':scope > ul > li > span').forEach(function (obj) {
          result[obj.innerText] = obj.classList.contains('title') ? helper(obj.parentNode) : obj.parentNode.querySelector('input').value;
      });
    
      return result;
    }
    
    console.log(helper(document.querySelector('body')));
    
    0 讨论(0)
  • 2020-12-05 21:58
    <section id="in">
        <ul>
            <li><div>lorem</div></li>
            <li>
                <div>lorem</div>
                <ul>
                    <li><div>lorem</div></li>
                    <li>
                        <div>lorem</div>
                    </li>
                    <li>
                        <div>lorem</div>
                        <ul>
                            <li><div>lorem</div></li>
                            <li>
                                <div>lorem</div>
                            </li>
                            <li><div>lorem</div></li>
                            <li><div>lorem</div></li>
                        </ul>
                    </li>
                    <li><div>lorem</div></li>
                </ul>
            </li>
            <li><div>lorem</div></li>
            <li><div>lorem</div></li>
        </ul>
    </section>
    
    <textarea id="outjson"></textarea>
    
        var a = [];
        getJSON($('#in'), a);
        function getJSON(el, arr)
        {
            el.children().each(function()
            {
                arr.push({});
                arr[arr.length-1][this.tagName] = [];
                if ($(this).children().length > 0)
                {
                    getJSON($(this), arr[arr.length-1][this.tagName]);
                }
            });
        }
        $('#outjson').text(JSON.stringify(a));
    

    You will get:

    [{"UL":[{"LI":[{"DIV":[]}]},{"LI":[{"DIV":[]},{"UL":[{"LI":[{"DIV":[]}]},{"LI":[{"DIV":[]}]},{"LI":[{"DIV":[]},{"UL":[{"LI":[{"DIV":[]}]},{"LI":[{"DIV":[]}]},{"LI":[{"DIV":[]}]},{"LI":[{"DIV":[]}]}]}]},{"LI":[{"DIV":[]}]}]}]},{"LI":[{"DIV":[]}]},{"LI":[{"DIV":[]}]}]}]

    0 讨论(0)
提交回复
热议问题