java操作wordexcel.docx
- 文档编号:12369395
- 上传时间:2023-04-18
- 格式:DOCX
- 页数:15
- 大小:17.77KB
java操作wordexcel.docx
《java操作wordexcel.docx》由会员分享,可在线阅读,更多相关《java操作wordexcel.docx(15页珍藏版)》请在冰豆网上搜索。
java操作wordexcel
JAVA读取WORD,EXCEL,POWERPOINT,PDF文件的方法
OFFICE文档使用POI控件,PDF可以使用PDFBOX0.7.3控件,完全支持中文,用XPDF也行,不过感觉PDFBOX比较好,而且作者也在更新。
水平有限,万望各位指正
WORD:
importorg.apache.lucene.document.Document;
importorg.apache.lucene.document.Field;
importorg.apache.poi.hwpf.extractor.WordExtractor;
importjava.io.File;
importjava.io.InputStream;
importjava.io.FileInputStream;
importcom.search.code.Index;
publicDocumentgetDocument(Indexindex,Stringurl,Stringtitle,InputStreamis)throwsDocCenterException{
StringbodyText=null;
try{
WordExtractorex=newWordExtractor(is);//is是WORD文件的InputStream
bodyText=ex.getText();
if(!
bodyText.equals("")){
index.AddIndex(url,title,bodyText);
}
}catch(DocCenterExceptione){
thrownewDocCenterException("无法从该MocriosoftWord文档中提取内容",e);
}catch(Exceptione){
e.printStackTrace();
}
}
returnnull;
}
Excel:
importorg.apache.lucene.document.Document;
importorg.apache.lucene.document.Field;
importorg.apache.poi.hwpf.extractor.WordExtractor;
importorg.apache.poi.hssf.usermodel.HSSFWorkbook;
importorg.apache.poi.hssf.usermodel.HSSFSheet;
importorg.apache.poi.hssf.usermodel.HSSFRow;
importorg.apache.poi.hssf.usermodel.HSSFCell;
importjava.io.File;
importjava.io.InputStream;
importjava.io.FileInputStream;
importcom.search.code.Index;
publicDocumentgetDocument(Indexindex,Stringurl,Stringtitle,InputStreamis)throwsDocCenterException{
StringBuffercontent=newStringBuffer();
try{HSSFWorkbookworkbook=newHSSFWorkbook(is);//创建对Excel工作簿文件的引用
for(intnumSheets=0;numSheets if(null! =workbook.getSheetAt(numSheets)){ HSSFSheetaSheet=workbook.getSheetAt(numSheets);//获得一个sheet for(introwNumOfSheet=0;rowNumOfSheet<=aSheet.getLastRowNum();rowNumOfSheet++){ if(null! =aSheet.getRow(rowNumOfSheet)){ HSSFRowaRow=aSheet.getRow(rowNumOfSheet);//获得一个行 for(shortcellNumOfRow=0;cellNumOfRow<=aRow.getLastCellNum();cellNumOfRow++){ if(null! =aRow.getCell(cellNumOfRow)){ HSSFCellaCell=aRow.getCell(cellNumOfRow);//获得列值 content.append(aCell.getStringCellValue()); } } } } } } if(! content.equals("")){ index.AddIndex(url,title,content.toString()); } }catch(DocCenterExceptione){ thrownewDocCenterException("无法从该MocriosoftWord文档中提取内容",e); }catch(Exceptione){ System.out.println("已运行xlRead(): "+e); } returnnull; } PowerPoint: importjava.io.InputStream; importorg.apache.lucene.document.Document; importorg.apache.poi.hslf.HSLFSlideShow; importorg.apache.poi.hslf.model.TextRun; importorg.apache.poi.hslf.model.Slide; importorg.apache.poi.hslf.usermodel.SlideShow; publicDocumentgetDocument(Indexindex,Stringurl,Stringtitle,InputStreamis) throwsDocCenterException{ StringBuffercontent=newStringBuffer(""); try{SlideShowss=newSlideShow(newHSLFSlideShow(is));//is为文件的InputStream,建立SlideShow Slide[]slides=ss.getSlides();//获得每一张幻灯片 for(inti=0;i TextRun[]t=slides[i].getTextRuns();//为了取得幻灯片的文字内容,建立TextRun for(intj=0;j content.append(t[j].getText());//这里会将文字内容加到content中去 } content.append(slides[i].getTitle()); } index.AddIndex(url,title,content.toString()); }catch(Exceptionex){ System.out.println(ex.toString()); } returnnull; } PDF: importjava.io.InputStream; importjava.io.IOException; importorg.apache.lucene.document.Document; importorg.pdfbox.cos.COSDocument; importorg.pdfbox.pdfparser.PDFParser; importorg.pdfbox.pdmodel.PDDocument; importorg.pdfbox.pdmodel.PDDocumentInformation; importorg.pdfbox.util.PDFTextStripper; importcom.search.code.Index; publicDocumentgetDocument(Indexindex,Stringurl,Stringtitle,InputStreamis)throwsDocCenterException{ COSDocumentcosDoc=null; try{ cosDoc=parseDocument(is); }catch(IOExceptione){ closeCOSDocument(cosDoc); thrownewDocCenterException("无法处理该PDF文档",e); } if(cosDoc.isEncrypted()){ if(cosDoc! =null) closeCOSDocument(cosDoc); thrownewDocCenterException("该PDF文档是加密文档,无法处理"); } StringdocText=null; try{ PDFTextStripperstripper=newPDFTextStripper(); docText=stripper.getText(newPDDocument(cosDoc)); }catch(IOExceptione){ closeCOSDocument(cosDoc); thrownewDocCenterException("无法处理该PDF文档",e); } PDDocumentpdDoc=null; try{ pdDoc=newPDDocument(cosDoc); PDDocumentInformationdocInfo=pdDoc.getDocumentInformation(); if(docInfo.getTitle()! =null&&! docInfo.getTitle().equals("")){ title=docInfo.getTitle(); } }catch(Exceptione){ closeCOSDocument(cosDoc); closePDDocument(pdDoc); System.err.println("无法取得该PDF文档的元数据"+e.getMessage()); }finally{ closeCOSDocument(cosDoc); closePDDocument(pdDoc); } returnnull; } privatestaticCOSDocumentparseDocument(InputStreamis)throwsIOException{ PDFParserparser=newPDFParser(is); parser.parse(); returnparser.getDocument(); } privatevoidcloseCOSDocument(COSDocumentcosDoc){ if(cosDoc! =null){ try{ cosDoc.close(); }catch(IOExceptione){ } } } privatevoidclosePDDocument(PDDocumentpdDoc){ if(pdDoc! =null){ try{ pdDoc.close(); }catch(IOExceptione){ } } } 用POI吧,我给你一个写的例子: publicclassGetExcelValue { privateStringexcelfile;//导入excel文件的名称和路径 //privateshortReportValueLength; privateintRecordCount; privatedouble[]ReportValue;//=newdouble[100];//返回excel文件的各个单元格内容到数组 privateStringMessage;//出现错误时返回的错误消息 privateintReturnCode;//调用的返回值,0为正常,1为异常 publicGetExcelValue(StringFileName,intReportLength) { excelfile=FileName; //初始化返回变量 Message=""; ReturnCode=0; RecordCount=0; ReportValue=newdouble[ReportLength]; } publicvoidOutPutExcelValue()//取得excel报表的内容 { try { //创建对Excel工作簿文件的引用 HSSFWorkbookworkbook=newHSSFWorkbook(newFileInputStream(excelfile)); //.println("===SheetsNum==="+workbook.getNumberOfSheets());//获取sheet数 for(intnumSheets=0;numSheets { if(null! =workbook.getSheetAt(numSheets)) { HSSFSheetaSheet=workbook.getSheetAt(numSheets);//获得一个sheet //System.out.println("+++getFirstRowNum+++"+ //aSheet.getFirstRowNum());// //System.out.println("+++getLastRowNum+++"+ //aSheet.getLastRowNum());取得一个sheet的行数,注意行数从0开始计数 //RecordCount=aSheet.getLastRowNum()+1; for(introwNumOfSheet=0;rowNumOfSheet<=aSheet.getLastRowNum();rowNumOfSheet++) { if(null! =aSheet.getRow(rowNumOfSheet)) { HSSFRowaRow=aSheet.getRow(rowNumOfSheet); //System.out.println(">>>getFirstCellNum<<<"+ //aRow.getFirstCellNum()); //System.out.println(">>>getLastCellNum<<<"+ //aRow.getLastCellNum());取得一行的单元割格数,注意行数从1开始计数 for(shortcellNumOfRow=0;cellNumOfRow<=(aRow.getLastCellNum()-1);cellNumOfRow++) { if(null! =aRow.getCell(cellNumOfRow)) { HSSFCellaCell=aRow.getCell(cellNumOfRow); intcellType=aCell.getCellType(); //System.out.println(cellType); switch(cellType) { case0: //Numeric,CELL_TYPE_NUMERIC StringCheckLength=String.valueOf(aCell.getNumericCellValue()); if(CheckLength.length()>18) { Message="错误,第"+(rowNumOfSheet+1)+"行"+(cellNumOfRow+1)+"列"+"单元格的内容超过最大长度"; ReturnCode=1; return; } ReportValue[cellNumOfRow]=aCell.getNumericCellValue(); Message=String.valueOf(ReportValue[cellNumOfRow]); //System.out.println(strCell); break; case1: //String StringstrCell=aCell.getStringCellValue(); Stringtempstr=strCell.trim(); if(tempstr.length()==0) { ReportValue[cellNumOfRow]=0; } else { Message="错误,第"+(rowNumOfSheet+1)+"行"+(cellNumOfRow+1)+"列"+"单元格的内容不对,注意只可以是数字或空格或不输入! "; ReturnCode=1; return; } //System.out.println(strCell); break; case3: ReportValue[cellNumOfRow]=0; break; default: Message="错误,第"+(rowNumOfSheet+1)+"行"+(cellNumOfRow+1)+"列"+"单元格的内容不对,注意只可以是数字或空格或不输入! "; ReturnCode=1; return; //System.out.println("格式不对不读");//其它格式的数据 } } } } RecordCount=rowNumOfSheet+1;//取得成功处理的数据条数 } } } //System.out.print("aaa"); Message="成功处理了"+RecordCount+"条数据"; } catch(FileNotFoundExceptione) { Message="错误,未找到指定文件"; ReturnCode=1; } catch(Exceptione) { ReturnCode=1; Message=e.toString(); e.printStackTrace(); //System.out.println("ReadExcelError"+e); } /* finally { //在无论出错还是不出错都执行的代码! Message="打开文件失败! "; //Message= ReturnCode=1; }*/ } publicdouble[]getReportValue() { //Message=String.valueOf(ReportValue[2]); returnReportValue; } publicStringgetMessage() { returnMessage; } publicintgetReturnCode() { returnReturnCode; } } poi就可以实现呀 word ============================ byteby[]=word文件内容; java.io.ByteArrayInputStreambis=newjava.io.ByteArrayInputStream(by); /* org.apache.poi.hdf.extractor.WordDocumentwd=neworg.apache.poi.hdf.extractor.WordDocument(bis); java.io.StringWriterdocTextWriter=newjava.io.StringWriter(); wd.writeAllText(newjava.io.PrintWriter(docTextWriter)); docTextWriter.close(); StringbodyText=docTextWriter.toString(); //bodyText=newWordExtractor().extractText(is); */ //FileInputStreamin=newFileInputStream("c: \\a.doc"); Stringcontent; try { org.apache.poi.hwpf.extractor.WordExtractorextractor=neworg.apache.poi.hwpf.extractor.WordExtractor(bis); content=extractor.getText(); }catch(java.io.IOExceptione) { res
- 配套讲稿:
如PPT文件的首页显示word图标,表示该PPT已包含配套word讲稿。双击word图标可打开word文档。
- 特殊限制:
部分文档作品中含有的国旗、国徽等图片,仅作为作品整体效果示例展示,禁止商用。设计者仅对作品中独创性部分享有著作权。
- 关 键 词:
- java 操作 wordexcel