How to split a excel file into multiple files based on row count using apache poi?

依然范特西╮ 提交于 2019-12-05 09:19:04

I've got you.

package com.industries.seanimus;  

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.poi.EncryptedDocumentException;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.streaming.SXSSFCell;
import org.apache.poi.xssf.streaming.SXSSFRow;
import org.apache.poi.xssf.streaming.SXSSFSheet;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;  

public class ReportSplitter {

    private final String fileName;
    private final int maxRows;

    public ReportSplitter(String fileName, final int maxRows) {

        ZipSecureFile.setMinInflateRatio(0);

        this.fileName = fileName;
        this.maxRows = maxRows;

        try {
            /* Read in the original Excel file. */
            OPCPackage pkg = OPCPackage.open(new File(fileName));
            XSSFWorkbook workbook = new XSSFWorkbook(pkg);
            XSSFSheet sheet = workbook.getSheetAt(0);

            /* Only split if there are more rows than the desired amount. */
            if (sheet.getPhysicalNumberOfRows() >= maxRows) {
                List<SXSSFWorkbook> wbs = splitWorkbook(workbook);
                writeWorkBooks(wbs);
            }
            pkg.close();
        }
        catch (EncryptedDocumentException | IOException | InvalidFormatException e) {
            e.printStackTrace();
        }
    }

    private List<SXSSFWorkbook> splitWorkbook(XSSFWorkbook workbook) {

        List<SXSSFWorkbook> workbooks = new ArrayList<SXSSFWorkbook>();

        SXSSFWorkbook wb = new SXSSFWorkbook();
        SXSSFSheet sh = wb.createSheet();

        SXSSFRow newRow;
        SXSSFCell newCell;

        int rowCount = 0;
        int colCount = 0;

        XSSFSheet sheet = workbook.getSheetAt(0);

        for (Row row : sheet) {
            newRow = sh.createRow(rowCount++);

            /* Time to create a new workbook? */
            if (rowCount == maxRows) {
                workbooks.add(wb);
                wb = new SXSSFWorkbook();
                sh = wb.createSheet();
                rowCount = 0;
            }

            for (Cell cell : row) {
                newCell = newRow.createCell(colCount++);
                newCell = setValue(newCell, cell);

                CellStyle newStyle = wb.createCellStyle();
                newStyle.cloneStyleFrom(cell.getCellStyle());
                newCell.setCellStyle(newStyle);
            }
            colCount = 0;
        }

        /* Only add the last workbook if it has content */
        if (wb.getSheetAt(0).getPhysicalNumberOfRows() > 0) {
            workbooks.add(wb);
        }
        return workbooks;
    }

    /*
     * Grabbing cell contents can be tricky. We first need to determine what
     * type of cell it is.
     */
    private SXSSFCell setValue(SXSSFCell newCell, Cell cell) {
        switch (cell.getCellType()) {
        case Cell.CELL_TYPE_STRING: 
            newCell.setCellValue(cell.getRichStringCellValue().getString());
            break;
        case Cell.CELL_TYPE_NUMERIC:
            if (DateUtil.isCellDateFormatted(cell)) {
                newCell.setCellValue(cell.getDateCellValue());
            } else {
                newCell.setCellValue(cell.getNumericCellValue());
            }
            break;
        case Cell.CELL_TYPE_BOOLEAN:
            newCell.setCellValue(cell.getBooleanCellValue());
            break;
        case Cell.CELL_TYPE_FORMULA:
            newCell.setCellFormula(cell.getCellFormula());
            break;
        default:
            System.out.println("Could not determine cell type");
        }
        return newCell;
    }

    /* Write all the workbooks to disk. */
    private void writeWorkBooks(List<SXSSFWorkbook> wbs) {
        FileOutputStream out;
        try {
            for (int i = 0; i < wbs.size(); i++) {
                String newFileName = fileName.substring(0, fileName.length() - 5);
                out = new FileOutputStream(new File(newFileName + "_" + (i + 1) + ".xlsx"));
                wbs.get(i).write(out);
                out.close();
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public static void main(String[] args){
        /* This will create a new workbook every 1000 rows. */
        new ReportSplitter("Data.xlsx", 1000);
    }

}

A few notes:

  • For writing the workbooks, I use SXSSFWorkbook. It's a lot faster than HSSF or XSSF, as it doesn't hold everything in memory before writing (which causes a horrible gc mess).

  • The Busy Developer's Guide is your friend for learning Apache POI ;)

ENJOY!


EDIT: I've updated the code to copy cell styles as well. Two things to note about this:
  • Copying styles will SLOW things down considerably.
  • POI creates a template file that may become too big to be uncompressed, throwing a Zip bomb detected error. You can fix this by changing the minimum inflation ratio via ZipSecureFile.setMinInflateRatio(0).

Thanks for your code. Just two cent from my side The code above does not copy the time Hence I modified it for having Time Columns which is a small modification in setValue Code. Basically I'm checking using format part if it is a time column for which the year would be 1899 Hope it helps :)

 private static SXSSFCell setValue(SXSSFCell newCell, Cell cell) {
        switch (cell.getCellType()) {
        case Cell.CELL_TYPE_STRING: 
            newCell.setCellValue(cell.getRichStringCellValue().getString());
            break;
        case Cell.CELL_TYPE_NUMERIC:
            //System.out.println("The Cell Type is numeric ");
            if (DateUtil.isCellDateFormatted(cell)) {
                System.out.println(cell.getDateCellValue());
                Date c = cell.getDateCellValue();
                SimpleDateFormat simpleDateFormat = new SimpleDateFormat("hh:mm:ss");
                SimpleDateFormat year = new SimpleDateFormat("yyyy");
                String strTime = simpleDateFormat.format(c);
                String strYear=year.format(c);
                if(strYear.equals("1899"))
                {
                    System.out.println(strTime);
                    newCell.setCellValue(DateUtil.convertTime(strTime));    
                }
                else
                {
                    newCell.setCellValue(c);
                }



            } else {
                newCell.setCellValue(cell.getNumericCellValue());
            }
            break;
        case Cell.CELL_TYPE_BOOLEAN:
            newCell.setCellValue(cell.getBooleanCellValue());
            break;
        case Cell.CELL_TYPE_FORMULA:
            newCell.setCellFormula(cell.getCellFormula());
            break;
        default:
            System.out.println("Could not determine cell type");
        }
        return newCell;
    }
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!