Index: pom.xml =================================================================== --- pom.xml (revision 727) +++ pom.xml (working copy) @@ -31,12 +31,12 @@ org.apache.poi poi - 3.1-beta2 + 3.2-FINAL org.apache.poi poi-scratchpad - 3.1-beta2 + 3.2-FINAL junit Index: src/main/java/org/jboss/dna/sequencer/msoffice/excel/ExcelMetadataReader.java =================================================================== --- src/main/java/org/jboss/dna/sequencer/msoffice/excel/ExcelMetadataReader.java (revision 727) +++ src/main/java/org/jboss/dna/sequencer/msoffice/excel/ExcelMetadataReader.java (working copy) @@ -27,7 +27,11 @@ import java.io.InputStream; import java.util.ArrayList; import java.util.List; -import org.apache.poi.hssf.extractor.ExcelExtractor; + +import org.apache.poi.hssf.usermodel.HSSFCell; +import org.apache.poi.hssf.usermodel.HSSFComment; +import org.apache.poi.hssf.usermodel.HSSFRow; +import org.apache.poi.hssf.usermodel.HSSFSheet; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.poifs.filesystem.POIFSFileSystem; @@ -37,18 +41,78 @@ */ public class ExcelMetadataReader { + /** The character to output after each row. */ + private static final char ROW_DELIMITER_CHAR = '\n'; + /** The character to output after each cell (column). */ + private static final char CELL_DELIMITER_CHAR = '\t'; + public static ExcelMetadata instance( InputStream stream ) throws IOException { ExcelMetadata metadata = new ExcelMetadata(); HSSFWorkbook wb = new HSSFWorkbook(new POIFSFileSystem(stream)); - ExcelExtractor extractor = new ExcelExtractor(wb); - extractor.setFormulasNotResults(true); - extractor.setIncludeSheetNames(false); - metadata.setText(extractor.getText()); + StringBuffer buff = new StringBuffer(); List sheets = new ArrayList(); - for (int i = 0; i < wb.getNumberOfSheets(); i++) { - sheets.add(wb.getSheetName(i)); + + for (int sheetInd = 0; sheetInd < wb.getNumberOfSheets(); sheetInd++) { + sheets.add(wb.getSheetName(sheetInd)); + + HSSFSheet worksheet = wb.getSheetAt(sheetInd); + int lastRowNum = worksheet.getLastRowNum(); + + for (int rowNum = worksheet.getFirstRowNum(); rowNum <= lastRowNum; rowNum++) { + HSSFRow row = worksheet.getRow(rowNum); + + // Empty rows are returned as null + if (row == null) { + continue; + } + + int lastCellNum = row.getLastCellNum(); + for (int cellNum = row.getFirstCellNum(); cellNum < lastCellNum; cellNum++) { + HSSFCell cell = row.getCell(cellNum); + + /* + * Builds a string of body content from all string, numeric, + * and formula values in the body of each worksheet. + * + * This code currently duplicates the POI 3.1 ExcelExtractor behavior of + * combining the body text from all worksheets into a single string. + */ + switch (cell.getCellType()) { + case HSSFCell.CELL_TYPE_STRING: + buff.append(cell.getRichStringCellValue().getString()); + break; + case HSSFCell.CELL_TYPE_NUMERIC: + buff.append(cell.getNumericCellValue()); + break; + case HSSFCell.CELL_TYPE_FORMULA: + buff.append(cell.getCellFormula()); + break; + } + + HSSFComment comment = cell.getCellComment(); + if (comment != null) { + // Filter out row delimiter characters from comment + String commentText = comment.getString().getString().replace(ROW_DELIMITER_CHAR, ' '); + + buff.append(" ["); + buff.append(commentText); + buff.append(" by "); + buff.append(comment.getAuthor()); + buff.append(']'); + } + + if (cellNum < lastCellNum - 1) { + buff.append(CELL_DELIMITER_CHAR); + } + else { + buff.append(ROW_DELIMITER_CHAR); + } + } + } } + + metadata.setText(buff.toString()); metadata.setSheets(sheets); return metadata; }