Error While Reading Large Excel Files (xlsx) Via Apache POI

后端 未结 4 1653
旧巷少年郎
旧巷少年郎 2020-11-29 07:31

I am trying to read large excel files xlsx via Apache POI, say 40-50 MB. I am getting out of memory exception. The current heap memory is 3GB.

I can read smaller exc

4条回答
  •  不知归路
    2020-11-29 08:32

    In the bellwo example I'll add a complete code how to parse a complete excel file (for me 60Mo) into list of object without any problem of "out of memory" and work fine:

    import java.util.ArrayList;
    import java.util.List;
    
    
    class DistinctByProperty {
    
        private static OPCPackage xlsxPackage = null;
        private static PrintStream output= System.out;
        private static List resultMapping = new ArrayList<>();
    
    
        public static void main(String[] args) throws IOException {
    
            File file = new File("C:\\Users\\aberguig032018\\Downloads\\your_excel.xlsx");
    
            double bytes = file.length();
            double kilobytes = (bytes / 1024);
            double megabytes = (kilobytes / 1024);
            System.out.println("Size "+megabytes);
    
            parseExcel(file);
        }
    
        public static void parseExcel(File file) throws IOException {
    
            try {
                xlsxPackage = OPCPackage.open(file.getAbsolutePath(), PackageAccess.READ);
                ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(xlsxPackage);
                XSSFReader xssfReader = new XSSFReader(xlsxPackage);
                StylesTable styles = xssfReader.getStylesTable();
                XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
                int index = 0;
                while (iter.hasNext()) {
                    try (InputStream stream = iter.next()) {
                        String sheetName = iter.getSheetName();
                        output.println();
                        output.println(sheetName + " [index=" + index + "]:");
                        processSheet(styles, strings, new MappingFromXml(resultMapping), stream);
                    }
                    ++index;
                }
    
            } catch (InvalidFormatException e) {
                e.printStackTrace();
            } catch (OpenXML4JException e) {
                e.printStackTrace();
            } catch (SAXException e) {
                e.printStackTrace();
            }
        }
    
        private static void processSheet(StylesTable styles, ReadOnlySharedStringsTable strings, MappingFromXml mappingFromXml, InputStream sheetInputStream) throws IOException, SAXException {
            DataFormatter formatter = new DataFormatter();
            InputSource sheetSource = new InputSource(sheetInputStream);
            try {
                XMLReader sheetParser = SAXHelper.newXMLReader();
                ContentHandler handler = new XSSFSheetXMLHandler(
                        styles, null, strings, mappingFromXml, formatter, false);
    
                sheetParser.setContentHandler(handler);
                sheetParser.parse(sheetSource);
                System.out.println("Size of Array "+resultMapping.size());
            } catch(ParserConfigurationException e) {
                throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage());
            }
        }
    }
    

    you have to add a calss that implements

    SheetContentsHandler

    import com.sun.org.apache.xpath.internal.operations.Bool;
    import org.apache.poi.ss.util.CellAddress;
    import org.apache.poi.ss.util.CellReference;
    import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
    
    import org.apache.poi.xssf.usermodel.XSSFComment;
    
    import java.io.PrintStream;
    import java.util.ArrayList;
    import java.util.List;
    
    public class MappingFromXml implements SheetContentsHandler {
    
        private List result = new ArrayList<>();
        private myObject myObject = null;
        private int lineNumber = 0;
        /**
         * Number of columns to read starting with leftmost
         */
        private int minColumns = 25;
        /**
         * Destination for data
         */
        private PrintStream output = System.out;
    
        public MappingFromXml(List list) {
            this.result = list;
        }
    
        @Override
        public void startRow(int i) {
            output.println("iii " + i);
            lineNumber = i;
            myObject = new myObject();
        }
    
        @Override
        public void endRow(int i) {
            output.println("jjj " + i);
            result.add(myObject);
            myObject = null;
        }
    
        @Override
        public void cell(String cellReference, String formattedValue, XSSFComment comment) {
            int columnIndex = (new CellReference(cellReference)).getCol();
    
            if(lineNumber > 0){
                switch (columnIndex) {
                    case 0: {//Tech id
                        if (formattedValue != null && !formattedValue.isEmpty())
                            myObject.setId(Integer.parseInt(formattedValue));
                    }
                    break;
                    //TODO add other cell
                }
            }
        }
    
        @Override
        public void headerFooter(String s, boolean b, String s1) {
    
        }
    }
    

    For more information visite this link

提交回复
热议问题