【推荐】2019 Java 开发者跳槽指南.pdf(吐血整理) >>>
poi版本3.9
操作之前获取文件输入流对象
FileInputStream fis;
try {
fis = new FileInputStream(file);
} catch (FileNotFoundException fnfe) {
return;
}
1.获取word2003及以前版本内容。
WordExtractor wordExtractor = new WordExtractor(fis);
String result = wordExtractor.getText();
2.获取word2007内容。
XWPFWordExtractor xwpfWordExtractor = new XWPFWordExtractor(new XWPFDocument(fis));
String result = xwpfWordExtractor.getText();
3.获取excel2003及以前版本内容。
POIFSFileSystem poifsFileSystem = new POIFSFileSystem(fis);
StringBuffer sb = new StringBuffer();
HSSFWorkbook wb = new HSSFWorkbook(poifsFileSystem);
for(int sheetNum = 0;sheetNum < wb.getNumberOfSheets() ;sheetNum++){
if(wb.getSheetAt(sheetNum)!=null){
HSSFSheet sheet = wb.getSheetAt(sheetNum);
for(int sheetRow =0;sheetRow<sheet.getLastRowNum();sheetRow++){
if(sheet.getRow(sheetRow)!=null){
HSSFRow row = sheet.getRow(sheetRow);
for(int sheetCol =0;sheetCol<row.getLastCellNum();sheetCol++){
if(row.getCell(sheetCol)!=null){
HSSFCell aCell = row.getCell(sheetCol);
if (aCell.getCellType() == HSSFCell.CELL_TYPE_NUMERIC) {
sb.append(aCell.getNumericCellValue() + "\t");
} else if (aCell.getCellType() == HSSFCell.CELL_TYPE_BOOLEAN) {
sb.append(aCell.getBooleanCellValue() + "\t");
} else if (aCell.getCellType() == HSSFCell.CELL_TYPE_STRING) {
sb.append(aCell.getStringCellValue() + "\t");
} else if (aCell.getCellType() == HSSFCell.CELL_TYPE_FORMULA){
sb.append(aCell.getCellFormula() + "\t");
}
}
if(sheetCol==row.getLastCellNum()-1){
sb.append("\n");
}
}
}
}
}
}
4.获取excel2007内容。
XSSFWorkbook wb = new XSSFWorkbook(fis);
StringBuffer sb = new StringBuffer();
for(int sheetNum = 0;sheetNum < wb.getNumberOfSheets() ;sheetNum++){
if(wb.getSheetAt(sheetNum)!=null){
XSSFSheet sheet = wb.getSheetAt(sheetNum);
for(int sheetRow =0;sheetRow<sheet.getLastRowNum();sheetRow++){
if(sheet.getRow(sheetRow)!=null){
XSSFRow row = sheet.getRow(sheetRow);
for(int sheetCol =0;sheetCol<row.getLastCellNum();sheetCol++){
if(row.getCell(sheetCol)!=null){
XSSFCell aCell = row.getCell(sheetCol);
if (aCell.getCellType() == HSSFCell.CELL_TYPE_NUMERIC) {
sb.append(aCell.getNumericCellValue() + "\t");
} else if (aCell.getCellType() == HSSFCell.CELL_TYPE_BOOLEAN) {
sb.append(aCell.getBooleanCellValue() + "\t");
} else if (aCell.getCellType() == HSSFCell.CELL_TYPE_STRING) {
sb.append(aCell.getStringCellValue() + "\t");
} else if (aCell.getCellType() == HSSFCell.CELL_TYPE_FORMULA){
sb.append(aCell.getCellFormula() + "\t");
}
}
if(sheetCol==row.getLastCellNum()-1){
sb.append("\n");
}
}
}
}
}
}
5.获取ppt文件内容。
StringBuffer sb = new StringBuffer();
SlideShow ss = new SlideShow(new HSLFSlideShow(fis));
Slide[] s = ss.getSlides();
for(int i=0;i<s.length;i++){
sb.append(s[i].getTitle());
TextRun[] t = s[i].getTextRuns();
for(int j=0;j<t.length;j++){
sb.append(t[j].getText()+"\t");
}
sb.append("\n");
}
6. 获取pdf文件内容。
PDFParser parser = new PDFParser(fis);
parser.parse();
PDDocument pdDocument = parser.getPDDocument();
PDFTextStripper stripper = new PDFTextStripper();
String result = stripper.getText(pdDocument);
来源:oschina
链接:https://my.oschina.net/u/942785/blog/119623