import org.apache.lucene.document.Document; import org.apache.lucene.document.Field;
import org.apache.poi.hwpf.extractor.WordExtractor; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.hssf.usermodel.HSSFSheet; import org.apache.poi.hssf.usermodel.HSSFRow; import org.apache.poi.hssf.usermodel.HSSFCell;
import java.io.File; import java.io.InputStream; import java.io.FileInputStream;
import com.search.code.Index;
public Document getDocument(Index index, String url, String title, InputStream is) throws DocCenterException ...{ StringBuffer content =new StringBuffer(); try...{ HSSFWorkbook workbook = new HSSFWorkbook(is);//创建对Excel工作簿文件的引用 for (int numSheets =0; numSheets < workbook.getNumberOfSheets(); numSheets++) ...{ if (null!= workbook.getSheetAt(numSheets)) ...{ HSSFSheet aSheet = workbook.getSheetAt(numSheets);//获得一个sheet for (int rowNumOfSheet =0; rowNumOfSheet <= aSheet.getLastRowNum(); rowNumOfSheet++) ...{ if (null!= aSheet.getRow(rowNumOfSheet)) ...{ HSSFRow aRow = aSheet.getRow(rowNumOfSheet); //获得一个行 for (short cellNumOfRow =0; cellNumOfRow <= aRow.getLastCellNum(); cellNumOfRow++) ...{ if (null!= aRow.getCell(cellNumOfRow)) ...{ HSSFCell aCell = aRow.getCell(cellNumOfRow);//获得列值 content.append(aCell.getStringCellValue()); } } } } } } if(!content.equals(""))...{ index.AddIndex(url, title, content.toString()); } }catch (DocCenterException e) ...{
thrownew DocCenterException("无法从该Mocriosoft Word文档中提取内容", e); }catch(Exception e) ...{ System.out.println("已运行xlRead() : " + e ); } returnnull; } |