问题
I am using the following code to convert an xls to csv and I get the error : Initialisation of record 0x5B left 1 bytes remaining still to be read. I thought it might be the file, I have tried different files and it's not the case.
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
public class Excel2Txt{
public static void convertExcelToCSV(String fileName) throws InvalidFormatException, IOException {
BufferedWriter output = new BufferedWriter(new FileWriter(fileName.substring(0, fileName.lastIndexOf(".")) + ".csv"));
InputStream is = new FileInputStream(fileName);
Workbook wb = WorkbookFactory.create(is);
Sheet sheet = wb.getSheetAt(0);
// hopefully the first row is a header and has a full compliment of
// cells, else you'll have to pass in a max (yuck)
int maxColumns = sheet.getRow(0).getLastCellNum();
for (Row row : sheet) {
// row.getFirstCellNum() and row.getLastCellNum() don't return the
// first and last index when the first or last column is blank
int minCol = 0; // row.getFirstCellNum()
int maxCol = maxColumns; // row.getLastCellNum()
for (int i = minCol; i < maxCol; i++) {
Cell cell = row.getCell(i);
String buf = "";
if (i > 0) {
buf = ",";
}
if (cell == null) {
output.write(buf);
//System.out.print(buf);
} else {
String v = null;
switch (cell.getCellType()) {
case Cell.CELL_TYPE_STRING:
v = cell.getRichStringCellValue().getString();
break;
case Cell.CELL_TYPE_NUMERIC:
if (DateUtil.isCellDateFormatted(cell)) {
v = cell.getDateCellValue().toString();
} else {
v = String.valueOf(cell.getNumericCellValue());
}
break;
case Cell.CELL_TYPE_BOOLEAN:
v = String.valueOf(cell.getBooleanCellValue());
break;
case Cell.CELL_TYPE_FORMULA:
v = cell.getCellFormula();
break;
default:
}
if (v != null) {
buf = buf + toCSV(v);
}
output.write(buf);
//System.out.print(buf);
}
}
output.write("\n");
//System.out.println();
}
is.close();
output.close();
}
public static String toCSV(String value) {
String v = null;
boolean doWrap = false;
if (value != null) {
v = value;
if (v.contains("\"")) {
v = v.replace("\"", "\"\""); // escape embedded double quotes
doWrap = true;
}
if (v.contains(",") || v.contains("\n")) {
doWrap = true;
}
if (doWrap) {
v = "\"" + v + "\""; // wrap with double quotes to hide the comma
}
}
return v;
}
public static void main(String[] args) throws IOException {
try {
convertExcelToCSV("\\C:\\Data\\test.xls");
} catch (InvalidFormatException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
回答1:
Load your excel file and save it to some newer version of excel. Then test with this new file. And use the latest release of POI.
回答2:
Another solution which may be of interest to anyone googleing this is to use libre office headless convert which will work on Linux. Pasted and modified code
String[] commandArray = new String[]{
"libreoffice"
,"--headless"
,"--convert-to"
,"xlsx:Calc MS Excel 2007 XML"
,"--outdir"
,Paths.get(file.getPath()).getParent().toString()
,file.getPath()
};
Runtime rt = Runtime.getRuntime();
Process proc = rt.exec(commandArray);
InputStream stin = proc.getInputStream();
InputStreamReader isr = new InputStreamReader(stin);
BufferedReader br = new BufferedReader(isr);
String line;
System.out.println("input");
while ((line = br.readLine()) != null) {
System.out.println(line);
}
stin = proc.getErrorStream();
isr = new InputStreamReader(stin);
br = new BufferedReader(isr);
while ((line = br.readLine()) != null) {
System.out.println(line);
}
int exitVal = proc.waitFor();
System.out.println("Process exitValue: " + exitVal);
OPCPackage opcPackage = OPCPackage.open(new File(file.getPath() + "x"));
book = new XSSFWorkbook(opcPackage);
来源:https://stackoverflow.com/questions/20010854/initialisation-of-record-0x5b-left-1-bytes-remaining-still-to-be-read-apache-po