Split a large JSON file into smaller JSON files using Java

旧时模样 提交于 2021-01-29 15:48:15

问题


I have a large dataset in JSON format, for ease of use, I want to split it into multiple json files while still maintaining the structure. For ex:{ "{"users": [ { "userId": 1, "firstName": "Krish", "lastName": "Lee", "phoneNumber": "123456", "emailAddress": "krish.lee@learningcontainer.com" }, { "userId": 2, "firstName": "racks", "lastName": "jacson", "phoneNumber": "123456", "emailAddress": "racks.jacson@learningcontainer.com" }, { "userId": 3, "firstName": "denial", "lastName": "roast", "phoneNumber": "33333333", "emailAddress": "denial.roast@learningcontainer.com" }, { "userId": 4, "firstName": "devid", "lastName": "neo", "phoneNumber": "222222222", "emailAddress": "devid.neo@learningcontainer.com" }, { "userId": 5, "firstName": "jone", "lastName": "mac", "phoneNumber": "111111111", "emailAddress": "jone.mac@learningcontainer.com" } ] } I should be able to split it in such a way that each userid goes to a different file. So far, i have tried putting them to a map and try to split the map, and converting it into array and split the array with not much luck. The files contain the userid but it is not in json format anymore Any suggestions on how this can be achieved in Java?

Expected result: {"users": [ { "userId": 1, "firstName": "Krish", "lastName": "Lee", "phoneNumber": "123456", "emailAddress": "krish.lee@learningcontainer.com" } ] }


回答1:


To process large files prefer to use stream/event oriented parsing. Both Gson and Jackson support that way. Just an illustration with a tiny JSON parser https://github.com/anatolygudkov/green-jelly:

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.io.Writer;

public class SplitMyJson {
    private static final String jsonToSplit = "{\"users\": [\n" +
            "    {\n" +
            "      \"userId\": 1,\n" +
            "      \"firstName\": \"Krish\",\n" +
            "      \"lastName\": \"Lee\",\n" +
            "      \"phoneNumber\": \"123456\",\n" +
            "      \"emailAddress\": \"krish.lee@learningcontainer.com\"\n" +
            "    },\n" +
            "    {\n" +
            "      \"userId\": 2,\n" +
            "      \"firstName\": \"racks\",\n" +
            "      \"lastName\": \"jacson\",\n" +
            "      \"phoneNumber\": \"123456\",\n" +
            "      \"emailAddress\": \"racks.jacson@learningcontainer.com\"\n" +
            "    },\n" +
            "    {\n" +
            "      \"userId\": 3,\n" +
            "      \"firstName\": \"denial\",\n" +
            "      \"lastName\": \"roast\",\n" +
            "      \"phoneNumber\": \"33333333\",\n" +
            "      \"emailAddress\": \"denial.roast@learningcontainer.com\"\n" +
            "    },\n" +
            "    {\n" +
            "      \"userId\": 4,\n" +
            "      \"firstName\": \"devid\",\n" +
            "      \"lastName\": \"neo\",\n" +
            "      \"phoneNumber\": \"222222222\",\n" +
            "      \"emailAddress\": \"devid.neo@learningcontainer.com\"\n" +
            "    },\n" +
            "    {\n" +
            "      \"userId\": 5,\n" +
            "      \"firstName\": \"jone\",\n" +
            "      \"lastName\": \"mac\",\n" +
            "      \"phoneNumber\": \"111111111\",\n" +
            "      \"emailAddress\": \"jone.mac@learningcontainer.com\"\n" +
            "    }\n" +
            "  ]\n" +
            "}";

    public static void main(String[] args) {
        final JsonParser parser = new JsonParser();
        parser.setListener(new Splitter(new File("/home/gudkov/mytest")));
        parser.parse(jsonToSplit); // if you read a file, call parse() several times part by part in a loop until EOF
        parser.eoj(); // and then call .eoj()
    }

    static class Splitter extends JsonParserListenerAdaptor {
        private final JsonGenerator jsonGenerator = new JsonGenerator();
        private final AppendableWriter<Writer> appendableWriter = new AppendableWriter<>();

        private final File outputFolder;
        private int objectDepth;
        private int userIndex;

        Splitter(final File outputFolder) {
            this.outputFolder = outputFolder;
            if (!outputFolder.exists()) {
                outputFolder.mkdirs();
            }

            jsonGenerator.setOutput(appendableWriter);
        }

        private boolean userJustStarted() {
            return objectDepth == 2;
        }

        private boolean userJustEnded() {
            return objectDepth == 1;
        }

        private boolean notInUser() {
            return objectDepth < 2;
        }

        @Override
        public boolean onObjectStarted() {
            objectDepth++;

            if (notInUser()) return true;

            if (userJustStarted()) {
                try {
                    appendableWriter.set(new FileWriter(new File(outputFolder, "user-" + userIndex + ".json")));
                } catch (IOException e) {
                    throw new UncheckedIOException(e);
                }
                userIndex++;
            }
            jsonGenerator.startObject();
            return true;
        }

        @Override
        public boolean onObjectEnded() {
            if (notInUser()) {
                objectDepth--;
                return true;
            }

            objectDepth--;

            jsonGenerator.endObject();

            if (userJustEnded()) { // user object ended
                try {
                    jsonGenerator.eoj();
                    appendableWriter.output().close();
                } catch (IOException e) {
                    throw new UncheckedIOException(e);
                }
            }
            return true;
        }

        @Override
        public boolean onArrayStarted() {
            if (notInUser()) return true;
            jsonGenerator.startArray();
            return true;
        }

        @Override
        public boolean onArrayEnded() {
            if (notInUser()) return true;
            jsonGenerator.endArray();
            return true;
        }

        @Override
        public boolean onObjectMember(final CharSequence name) {
            if (notInUser()) return true;
            jsonGenerator.objectMember(name);
            return true;
        }

        @Override
        public boolean onStringValue(final CharSequence data) {
            if (notInUser()) return true;
            jsonGenerator.stringValue(data, true);
            return true;
        }

        @Override
        public boolean onNumberValue(final JsonNumber number) {
            if (notInUser()) return true;
            jsonGenerator.numberValue(number);
            return true;
        }

        @Override
        public boolean onTrueValue() {
            if (notInUser()) return true;
            jsonGenerator.trueValue();
            return true;
        }

        @Override
        public boolean onFalseValue() {
            if (notInUser()) return true;
            jsonGenerator.falseValue();
            return true;
        }

        @Override
        public boolean onNullValue() {
            if (notInUser()) return true;
            jsonGenerator.nullValue();
            return true;
        }
    }
}

In this way you can easily implement filtering, aggregating etc. for really large files with the highest performance possible in regular Java.



来源:https://stackoverflow.com/questions/61868825/split-a-large-json-file-into-smaller-json-files-using-java

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!