How to append json with no duplicating (including CasperJS code)?

为君一笑 提交于 2019-12-12 02:47:31

问题


I'm using CasperJS to parse inner texts of webpages and save to json file.

Here is my code and I'll show you result(problem that I have) below!

var words = [];
var casper = require('casper').create();
var x = require('casper').selectXPath;
var fs = require('fs');

function getWords() {
    var words = document.querySelectorAll('span.inner_tit');
    return Array.prototype.map.call(words, function(e) {
        return e.innerHTML;
    });
}

function createFinal(wordArray) {
    var out = [];
    wordArray.forEach(function(word) {
        out.push({"type": "river", "name": word, "spell": word.length});
    });
    return out;
}    

casper.start('http://dic.daum.net/index.do?dic=kor');


casper.thenClick(x('//*[@id="searchSubmit"]'), function(){
    console.log('searching');
});

casper.wait(2000, function() {
    casper.then(function() {
        words = this.evaluate(getWords);
    });
});

casper.wait(3000, function() {
    casper.thenClick(x('//*[@id="mArticle"]/div[2]/a[2]'), function (){
        words = words.concat(this.evaluate(getWords));
    });
});

casper.run(function() {
    var my_object = { "my_initial_words": createFinal(words)};
    this.echo(JSON.stringify(my_object, null, '\t'))
    var result = JSON.stringify(my_object, null, '\t')
    fs.write('myresults.json', result, 'a');
    this.exit();

});

This code's problem is, when I have json code like this,

{
    "my_initial_words": [
        {
            "type": "river",
            "name": "apple",
            "spell": "5"
        },
        {
            "type": "river",
            "name": "banana",
            "spell": "6"
        }   
    ]
}

My code appends all of it including the name of json arrays like this!

{
    "my_initial_words": [
        {
            "type": "river",
            "name": "apple",
            "spell": "5"
        },
        {
            "type": "river",
            "name": "banana",
            "spell": "6"
        }   
    ]
}  {
    "my_initial_words": [
        {
            "type": "river",
            "name": "apple",
            "spell": "5"
        },
        {
            "type": "river",
            "name": "banana",
            "spell": "6"
        }   
    ]
}

So.. I don't need to add all of it but want to add only these elements! (Without "my _initial_words: []")

{"type": "river",   "name": "apple","spell": "5"},
{"type": "river",   "name": "banana","spell": "6"}  

回答1:


Updating an object in file

JSON is defined in such a way that you can't append an object to an existing object and expect to get valid JSON out of it. You can however

  1. read the previous serialized JSON string,
  2. parse it into an object,
  3. append your new values to the array,
  4. serialize the object again and
  5. completely overwrite the existing file with it.

For example like this:

var previousDataString = fs.read('myresults.json');
var previousData = JSON.parse(previousDataString);
previousData["my_initial_words"] = previousData["my_initial_words"].concat(createFinal(words));
var newData = JSON.stringify(previousData, null, '\t')
fs.write('myresults.json', newData, 'w');

Writing chunks to file

If you still want to write your data file as separate chunks of JSON, then you can do this:

// Combine all items into a single string
var newItemsString = createFinal(words).reduce(function(combinedString, currentItem){
    return combinedString + JSON.stringify(currentItem) + "\n";
}, "")
// append new items to previous items
fs.write('myresults.json', newItemsString, 'a');

Each item (word's object) is written on exactly one line. When you read the file in some other process then you can use functions such as readLine() to read exactly one item at a time.

Fixing premature exiting

You also have to keep in mind how you're exiting CasperJS. If you provide a callback to casper.run(), then you need to explicitly call casper.exit() in order to exit the process. The problem is that you're doing that too early:

this.echo(JSON.stringify(previousData, null, '\t')).exit();
//                                                 ^^^^^^^ calling exit
var newData = JSON.stringify(previousData, null, '\t'); // not executed
fs.write('myscript.json', newData, 'w');  // not executed

Either you need to put the exit at the end of the callback:

this.echo(JSON.stringify(previousData, null, '\t'));
var newData = JSON.stringify(previousData, null, '\t');
fs.write('myscript.json', newData, 'w');
this.exit();

or don't put your final code into casper.then() instead of casper.run():

casper.then(function() {
    var previousDataString = fs.read('myscript.json');
    var previousData = JSON.parse(previousDataString);
    previousData["my_initial_words"] = previousData["my_initial_words"].concat(createFinal(words));
    this.echo(JSON.stringify(previousData, null, '\t'));
    var newData = JSON.stringify(previousData, null, '\t')
    fs.write('myscript.json', newData, 'w');
});
casper.run();


来源:https://stackoverflow.com/questions/32415747/how-to-append-json-with-no-duplicating-including-casperjs-code

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!