public class DataframeInDC {
private List<String> m_ColNames = new ArrayList<String>();
private Map<String, ArrayList<Object>> m_ColValues = new HashMap<String, ArrayList<Object>>();
private Map<Integer, String> m_IndexColNames = new HashMap<Integer, String>();
private Map<String, Integer> m_ColNameIndexes = new HashMap<String, Integer>();
private int m_RowsCapacity = -1;
public DataframeInDC() {
super();
}
public DataframeInDC(int nRowsCapacity) {
super();
m_RowsCapacity = nRowsCapacity;
}
public void initBlankCols() throws Exception {
ArrayList<Object> v1 = null;
for (int i = 0; i < m_ColNames.size(); i++) {
// System.out.println("i:"+i);
String colName = m_ColNames.get(i);
ArrayList<Object> v = null;
if (m_RowsCapacity == -1) {
v = new ArrayList<Object>();
} else {
v = new ArrayList<Object>(m_RowsCapacity);
}
if (i == 0) {
v1 = v;
}
updateColValue(colName, v);
if (v1 == v && i > 0)
System.out.println("v.hashCode():" + v.hashCode()
+ ": v1==v is " + (v1 == v));
}
}
public void initBlankCols(int nCapacity) throws Exception {
ArrayList<Object> v1 = null;
for (int i = 0; i < m_ColNames.size(); i++) {
// System.out.println("i:"+i);
String colName = m_ColNames.get(i);
ArrayList<Object> v = new ArrayList<Object>(nCapacity);
if (i == 0) {
v1 = v;
}
updateColValue(colName, v);
if (v1 == v && i > 0)
System.out.println("v.hashCode():" + v.hashCode()
+ ": v1==v is " + (v1 == v));
}
}
public void addCol(String colName, ArrayList<Object> colValues) throws Exception {
String sColName = colName.trim();
if (m_ColNameIndexes.containsKey(sColName)) {
throw new Exception("数据列[" + sColName + "]存在");
}
int nColIndex = m_ColNames.size();
m_ColNames.add(colName);
m_ColValues.put(sColName, colValues);
m_IndexColNames.put(nColIndex, colName);
m_ColNameIndexes.put(colName, nColIndex);
}
public void updateColValue(String colName, ArrayList<Object> colValue)
throws Exception {
String sColName = colName.trim();
if (!m_ColNameIndexes.containsKey(sColName)) {
throw new Exception("数据列[" + sColName + "]不存在");
}
m_ColValues.put(sColName, colValue);
}
/**
* 获取一个行
*
* @param rowIndex
* @return
* @throws Exception
*/
public String[] getRow(int rowIndex) throws Exception {
try {
String[] rowValues = new String[m_ColNames.size()];
for (int i = 0; i < this.getColCounts(); i++) {
ArrayList<Object> v = getColValue(i);
if (v != null) {
if (v.size() <= rowIndex) {
throw new Exception("列[" + getColNameAt(i) + "]长度不够["
+ v.size() + "].");
}
String rowValue = v.get(rowIndex).toString();
rowValues[i] = rowValue;
}
}
return rowValues;
} catch (Exception ex) {
throw ex;
}
}
/**
* 更新指定位置的值
*
* @param rowIndex
* @param colIndex
* @param value
* @throws Exception
*/
public void updateCellValue(int rowIndex, int colIndex, String value)
throws Exception {
ArrayList<Object> v = getColValue(colIndex);
v.set(rowIndex, value);
}
/**
* 追加一个行
*
* @param values
* @throws Exception
*/
public void addRow(String[] values) throws Exception {
if (values.length != m_ColNames.size()) {
throw new Exception("行值的数量和集合的列不匹配");
}
for (int i = 0; i < values.length; i++) {
ArrayList<Object> v = getColValue(i);
String value = values[i];
if (value == null)
value = "";
v.add(value);
}
}
/**
* 更新一个行的值
*
* @param values
* @param rowIndex
* @throws Exception
*/
public void updateRow(String[] values, int rowIndex) throws Exception {
if (values.length != m_ColNames.size()) {
throw new Exception("行值的数量和集合的列不匹配");
}
for (int i = 0; i < values.length; i++) {
String value = values[i];
if (value == null)
value = "";
ArrayList<Object> v = getColValue(i);
v.set(rowIndex, value);
}
}
/**
* 在当前的数据集中附加一个新的数据集
*
* @param dataframe
* @throws Exception
*/
public void append(DataframeInDC dataframe) throws Exception {
if (m_ColNames.size() != dataframe.getColNames().size()) {
throw new Exception("两个集合的列数目不一致");
}
int nColCount = this.getColCounts();
for (int i = 0; i < nColCount; i++) {
String sColName = this.getColNameAt(i);
ArrayList<Object> colValue = m_ColValues.get(sColName);
ArrayList<Object> colValue1 = dataframe.getColValue(sColName);
colValue.addAll(colValue1);
}
}
public int getColCounts() {
return m_ColNames.size();
}
public int getRowCounts() {
int nRowCounts = 0;
for (int i = 0; i < m_ColNames.size(); i++) {
ArrayList<Object> o = getColValue(i);
if (o == null) {
new Exception("列[" + m_ColNames.get(i) + "]的值为null.")
.printStackTrace();
}
if (o.size() > nRowCounts) {
nRowCounts = o.size();
}
}
return nRowCounts;
}
public DataframeInDC selectRows(int startRowIndex, int endRowIndex)
throws Exception {
DataframeInDC dataframe = new DataframeInDC();
dataframe.setColNames(this.getColNames());
dataframe.initBlankCols();
for (int i = 0; i < this.getRowCounts(); i++) {
if (i >= startRowIndex && i < endRowIndex) {
String[] rowValue = this.getRow(i);
dataframe.addRow(rowValue);
}
}
return dataframe;
}
public DataframeInDC selectCols(String[] pColNames) throws Exception {
DataframeInDC dataframe = new DataframeInDC();
List<String> colNames = new ArrayList<String>();
Map<String, ArrayList<Object>> colValues = new HashMap<String, ArrayList<Object>>();
for (int i = 0; i < pColNames.length; i++) {
String sColName = pColNames[i].trim();
if (m_ColNameIndexes.containsKey(sColName)) {
colNames.add(sColName);
colValues.put(sColName, getColValue(sColName));
}
}
dataframe.setColNames(colNames);
dataframe.setColValues(colValues);
return dataframe;
}
public DataframeInDC selectCols(int startColIndex, int endColIndex)
throws Exception {
DataframeInDC dataframe = new DataframeInDC();
List<String> colNames = new ArrayList<String>();
Map<String, ArrayList<Object>> colValues = new HashMap<String, ArrayList<Object>>();
for (int i = startColIndex; i < this.getColCounts() && i < endColIndex; i++) {
String sColName = m_ColNames.get(i);
ArrayList<Object> colValue = m_ColValues.get(sColName);
colNames.add(sColName);
colValues.put(sColName, colValue);
}
dataframe.setColNames(colNames);
dataframe.setColValues(colValues);
return dataframe;
}
/**
* 获取值的列
*
* @param colName
* @param colValue
* @return
*/
public int getRowIndexOfFirst(String colName, String colValue) {
int nIndexOf = -1;
int nColIndex = getColIndex(colName);
for (int i = 0; i < getRowCounts(); i++) {
String s0 = getColValue(i, nColIndex);
if (s0.equalsIgnoreCase(colValue)) {
nIndexOf = i;
break;
}
}
return nIndexOf;
}
/**
* 根据指定列的值来获取子数据集
*
* @param pColNames
* @param pColValues
* @return
* @throws Exception
*/
public DataframeInDC selectChild(String[] pColNames, String[] pColValues)
throws Exception {
DataframeInDC dataframe0 = new DataframeInDC();
dataframe0.setColNames(AnalysisListUtils.cloneList(this.getColNames()));
dataframe0.initBlankCols();
int rowCounts = getRowCounts();
for (int i = 0; i < rowCounts; i++) {
boolean bIsSelected = true; // 默认选中,如果有一个值不同,那么就不选中
for (int j = 0; j < pColNames.length; j++) {
String colName = pColNames[j].trim();
int colIndex = getColIndex(colName);
String colValue = getColValue(i, colIndex);
if (!colValue.trim().equals(pColValues[j])) {
bIsSelected = false;
continue;
}
}
if (bIsSelected) {
// 将当前行选中,并put到dataframeInDC中
String[] row = getRow(i);
dataframe0.addRow(row);
}
}
return dataframe0;
}
/**
* 根据指定列的值来获取子数据集
*
* @param pColNames
* @param pColValues
* @return
* @throws Exception
*/
public DataframeInDC selectChild(String[] pColNames, String[] pColValues,
String relate) throws Exception {
DataframeInDC dataframe0 = new DataframeInDC();
dataframe0.setColNames(AnalysisListUtils.cloneList(this.getColNames()));
dataframe0.initBlankCols();
int rowCounts = getRowCounts();
for (int i = 0; i < rowCounts; i++) {
boolean bIsSelected = true; // 默认选中,如果有一个值不同,那么就不选中
if (relate.equalsIgnoreCase("AND")) {
bIsSelected = true;
for (int j = 0; j < pColNames.length; j++) {
String colName = pColNames[j].trim();
int colIndex = getColIndex(colName);
String colValue = getColValue(i, colIndex);
if (!colValue.trim().equals(pColValues[j])) {
bIsSelected = false;
continue;
}
}
} else {
bIsSelected = false;
for (int j = 0; j < pColNames.length; j++) {
String colName = pColNames[j].trim();
int colIndex = getColIndex(colName);
String colValue = getColValue(i, colIndex);
if (colValue.trim().equals(pColValues[j])) {
bIsSelected = true;
continue;
}
}
}
if (bIsSelected) {
// 将当前行选中,并put到dataframeInDC中
String[] row = getRow(i);
dataframe0.addRow(row);
}
}
return dataframe0;
}
/**
* 根据指定需要排除列的值来获取子数据集
*
* @param pExcludeColNames
* @param pExcludeColValues
* @return
* @throws Exception
*/
public DataframeInDC selectChildWithExclude(String[] pExcludeColNames,
String[] pExcludeColValues, String relate) throws Exception {
DataframeInDC dataframe0 = new DataframeInDC();
dataframe0.setColNames(AnalysisListUtils.cloneList(this.getColNames()));
dataframe0.initBlankCols();
int rowCounts = getRowCounts();
StringBuffer oStringBuffer = new StringBuffer();
for (int i = 0; i < rowCounts; i++) {
boolean bIsExclude = true; // 默认为排除,如果有一个值不相同,那么不排除
if (relate.equalsIgnoreCase("AND")) {
bIsExclude = true;
for (int j = 0; j < pExcludeColNames.length; j++) {
String colName = pExcludeColNames[j].trim();
int colIndex = getColIndex(colName);
String colValue = getColValue(i, colIndex);
if (!colValue.trim().equals(pExcludeColValues[j])) { //
bIsExclude = false;
}
}
} else {
// 或关系,有一个值相同,就排除
bIsExclude = false;
for (int j = 0; j < pExcludeColNames.length; j++) {
String colName = pExcludeColNames[j].trim();
int colIndex = getColIndex(colName);
String colValue = getColValue(i, colIndex);
if (colValue.trim().equals(pExcludeColValues[j])) { //
// System.out.println(colValue+":"+pExcludeColValues[j]);
bIsExclude = true;
break;
}
}
}
if (!bIsExclude) { // 如果当前行不需要排除
// 将当前行选中,并put到dataframeInDC中
String[] row = getRow(i);
dataframe0.addRow(row);
} else {
//
int nColIndex = getColIndex("号码");
oStringBuffer.append(getColValue(i, nColIndex)).append(",");
}
}
if (oStringBuffer.length() > 0) {
System.out.println("===移除了:" + oStringBuffer.toString());
}
return dataframe0;
}
/**
* 根据指定需要排除列的值来获取子数据集
*
* @param pExcludeColNames
* @param pExcludeColValues
* @return
* @throws Exception
*/
public DataframeInDC selectChildWithExclude(String[] pExcludeColNames,
String[] pExcludeColValues, String relate, String uniqueKey,
Map<String, String> hmExcludeCounts) throws Exception {
DataframeInDC dataframe0 = new DataframeInDC();
dataframe0.setColNames(AnalysisListUtils.cloneList(this.getColNames()));
dataframe0.initBlankCols();
int rowCounts = getRowCounts();
System.out.println("rowCounts:" + rowCounts);
StringBuffer oStringBuffer = new StringBuffer();
int nExcludeCounts = 0;
for (int i = 0; i < rowCounts; i++) {
boolean bIsExclude = true; // 默认为排除,如果有一个值不相同,那么不排除
if (relate.equalsIgnoreCase("AND")) {
bIsExclude = true;
for (int j = 0; j < pExcludeColNames.length; j++) {
String colName = pExcludeColNames[j].trim();
int colIndex = getColIndex(colName);
String colValue = getColValue(i, colIndex);
if (!colValue.trim().equals(pExcludeColValues[j])) { //
bIsExclude = false;
}
}
} else {
// 或关系,有一个值相同,就排除
bIsExclude = false;
for (int j = 0; j < pExcludeColNames.length; j++) {
String colName = pExcludeColNames[j].trim();
int colIndex = getColIndex(colName);
String colValue = getColValue(i, colIndex);
if (colValue.trim().equals(pExcludeColValues[j])) { //
// System.out.println(colValue+":"+pExcludeColValues[j]);
bIsExclude = true;
break;
}
}
}
if (!bIsExclude) { // 如果当前行不需要排除
// 将当前行选中,并put到dataframeInDC中
String[] row = getRow(i);
dataframe0.addRow(row);
// System.out.println(i+":dataframe0.getRowCounts():"+dataframe0.getRowCounts());
} else {
//
int nColIndex = getColIndex(uniqueKey);
oStringBuffer.append(getColValue(i, nColIndex)).append(",");
nExcludeCounts++;
}
}
String sName = StringUtil.join(pExcludeColNames, ",");
String sValue = StringUtil.join(pExcludeColValues, ",");
hmExcludeCounts
.put(sName + "=" + sValue, String.valueOf(nExcludeCounts) + "/"
+ oStringBuffer.toString());
// System.out.println("dataframe0.getRowCounts():"+dataframe0.getRowCounts());
return dataframe0;
}
@Override
@SuppressWarnings("all")
public DataframeInDC clone() {
try {
DataframeInDC dataframe = new DataframeInDC();
List<String> colNames = new ArrayList<String>();
Map<String, ArrayList<Object>> colValues = new HashMap<String, ArrayList<Object>>();
for (int i = 0; i < this.getColCounts(); i++) {
String sColName = m_ColNames.get(i);
ArrayList<Object> colValue = m_ColValues.get(sColName);
colNames.add(sColName);
colValues.put(sColName, (ArrayList<Object>) colValue.clone());
}
dataframe.setColNames(colNames);
dataframe.setColValues(colValues);
return dataframe;
} catch (Exception ex) {
throw new RuntimeException(ex);
}
}
public String getColNameAt(int colIndex) {
String sColName = m_IndexColNames.get(colIndex);
return sColName;
}
public ArrayList<Object> getColValue(int colIndex) {
String sColName = m_ColNames.get(colIndex);
return getColValue(sColName);
}
public String getColValue(int rowIndex, int colIndex) {
ArrayList<Object> o = getColValue(colIndex);
String sColValue = "";
if (o.size() > rowIndex) {
Object oValue = o.get(rowIndex);
if (oValue instanceof String) {
sColValue = (String) oValue;
} else {
sColValue = o.get(rowIndex).toString();
}
}
return sColValue;
}
public String getColValue(int rowIndex, String colName) {
ArrayList<Object> o = getColValue(colName);
String sColValue = "";
if (o.size() > rowIndex) {
Object oValue = o.get(rowIndex);
if (oValue instanceof String) {
sColValue = (String) oValue;
} else {
sColValue = o.get(rowIndex).toString();
}
}
return sColValue;
}
public ArrayList<Object> getColValue(String colName) {
if (colName == null) {
System.out.println("Why?");
}
String sColName = colName.trim();
return m_ColValues.get(sColName);
}
/**
* 获取列的序号
*
* @param colName
* @return 列不存在返回-1
*/
public int getColIndex(String colName) {
int nIndex = -1;
try {
nIndex = m_ColNameIndexes.get(colName);
if (nIndex < 0)
nIndex = -1;
} catch (Exception ex) {
}
return nIndex;
}
public void setColNames(List<String> colNames) throws Exception {
m_ColNames = colNames;
for (int i = 0; i < colNames.size(); i++) {
if (m_ColNameIndexes.containsKey(colNames.get(i))) {
throw new Exception("数据集不允许出现重名字段[" + colNames.get(i) + "].");
}
m_IndexColNames.put(i, m_ColNames.get(i));
m_ColNameIndexes.put(m_ColNames.get(i), i);
}
}
public void setColValues(Map<String, ArrayList<Object>> colValues) {
m_ColValues = colValues;
}
public List<String> getColNames() {
return m_ColNames;
}
public Map<String, ArrayList<Object>> getColValues() {
return m_ColValues;
}
public DataframeInDC removeColumns(String[] excludeColumns)
throws Exception {
if (excludeColumns == null || excludeColumns.length == 0) {
return this;
}
DataframeInDC dataframe0 = new DataframeInDC();
for (int i = 0; i < getColCounts(); i++) {
String colName = getColNameAt(i);
if (StringUtil.isInArray(colName, excludeColumns, false)) {
continue;
}
dataframe0.addCol(colName, getColValue(i));
}
return dataframe0;
}
/**
* 对当前数据集添加另一个数据集,要求两个数据集的列名称保持一致性
*
* @param dataframeInDC
* @param pUniqueColName
* @throws Exception
*/
public void merge(DataframeInDC dataframeInDC, String pUniqueColName)
throws Exception {
int colCounts = dataframeInDC.getColCounts();
if (pUniqueColName == null || pUniqueColName.isEmpty()) {
for (int i = 0; i < colCounts; i++) {
String colname = dataframeInDC.getColNameAt(i);
List<Object> colValue0 = getColValue(colname);
List<Object> colValue1 = dataframeInDC.getColValue(i);
colValue0.addAll(colValue1);
}
} else {
List<Object> uniqueColValues = getColValue(pUniqueColName);
int uniqueColIndex = dataframeInDC.getColIndex(pUniqueColName);
for (int i = 0; i < dataframeInDC.getRowCounts(); i++) {
String uniqueColValue = dataframeInDC.getColValue(i,
uniqueColIndex);
if (uniqueColValues.indexOf(uniqueColValue) != -1) {
continue;
}
for (int j = 0; j < dataframeInDC.getColCounts(); j++) {
String colName = dataframeInDC.getColNameAt(j);
String colValue = dataframeInDC.getColValue(i, j);
List<Object> colValues = getColValue(colName);
colValues.add(colValue);
}
}
}
}
public class DataframeInDCUtils {
private DataframeInDC m_Dataframe = null;
private Map<String, Map<String, Integer>> m_GroupedValue = new HashMap<String, Map<String, Integer>>();
private Map<String, Map<String, String>> m_DigitizationInfo = new HashMap<String, Map<String, String>>();
// private Map<String, Double> m_SumsValue = new HashMap<String, Double>();
private int m_RowCounts = 0;
public DataframeInDCUtils(DataframeInDC dataframe) {
super();
m_Dataframe = dataframe;
}
/**
* 获取某一行数据
*
* @param dc
* 数据框架
* @param rowIndex
* 行索引
* @return
*/
public static Map<String, Object> getRowMap(DataframeInDC dc, int rowIndex) {
Map<String, Object> dataMap = new LinkedHashMap<String, Object>();
try {
List<String> columnNameList = dc.getColNames();
String[] row = dc.getRow(rowIndex);
if (row != null && columnNameList.size() == row.length) {
for (int i = 0; i < columnNameList.size(); i++) {
String columnName = StringUtil.getStr(columnNameList.get(i));
String value = StringUtil.getStr(row[i]);
dataMap.put(columnName, value);
}
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return dataMap;
}
public int counts(String colName) {
ArrayList<Object> o = m_Dataframe.getColValue(colName);
if (o == null)
return 0;
return o.size();
}
public int counts(String colName, String colValue) {
ArrayList<Object> colValues = m_Dataframe.getColValue(colName);
if (colValues == null) {
return 0;
}
int counts = 0;
String colValue0 = colValue.trim();
for (int i = 0; i < colValues.size(); i++) {
String s = String.valueOf(colValues.get(i));
if (s.equals(colValue0)) {
counts++;
}
}
return counts;
}
public int counts(String yColName, String yColValue, String[] xColNames, String[] xColValues) throws Exception {
if (xColNames.length != xColValues.length) {
throw new Exception("变量长度不一致");
}
String[] selColNames = new String[xColNames.length + 1];
String[] selColValues = new String[xColValues.length + 1];
System.arraycopy(xColNames, 0, selColNames, 0, xColNames.length);
System.arraycopy(xColValues, 0, selColValues, 0, xColValues.length);
selColNames[selColNames.length - 1] = yColName;
selColValues[selColValues.length - 1] = yColValue;
// 1.开始扫描行,根据行内容来判定是否符合选择
// long l = System.currentTimeMillis();
int counts = 0;
if (m_RowCounts == 0) {
m_RowCounts = m_Dataframe.getRowCounts();
}
int rowCounts = m_RowCounts;
for (int rowIndex = 0; rowIndex < rowCounts; rowIndex++) {
boolean bIsEquals = true;
for (int nIndex = 0; nIndex < selColNames.length; nIndex++) {
String sColName = selColNames[nIndex];
if (sColName.equals("")) {
continue;
}
String sColValue = selColValues[nIndex];
String sColValue0 = m_Dataframe.getColValue(rowIndex, sColName);
bIsEquals = bIsEquals && sColValue.equals(sColValue0);
}
if (bIsEquals) {
counts = counts + 1;
}
}
return counts;
}
public String[] getDistinctValues(String colName) {
List<Object> distinctValues = new ArrayList<Object>();
ArrayList<Object> o = m_Dataframe.getColValue(colName);
if (o == null) {
return new String[] {};
}
Map<String, String> hmValues = new HashMap<String, String>();
for (int i = 0; i < o.size(); i++) {
String s = String.valueOf(o.get(i));
s = s.trim();
if (hmValues.containsKey(s)) {
continue;
}
distinctValues.add(s);
hmValues.put(s, s);
}
String[] arDistinctValues = new String[distinctValues.size()];
distinctValues.toArray(arDistinctValues);
return arDistinctValues;
}
/**
* 获取分组后数量最多的值
*
* @param colName
* @return
*/
public String[] getMaxGroupedNumColValue(String colName) {
// m_GroupedValue
Map<String, Integer> hmGroupedValue = null;
if (m_GroupedValue.containsKey(colName)) {
// 如果已经缓存,从缓存中获取值
hmGroupedValue = m_GroupedValue.get(colName);
} else {
// 计算列的distinct value的数量
Map<String, Integer> hmValues = new HashMap<String, Integer>();
String[] distinctValues = getDistinctValues(colName);
for (int i = 0; i < distinctValues.length; i++) {
String s = distinctValues[i];
int ncounts = counts(colName, s);
hmValues.put(s, ncounts);
}
m_GroupedValue.put(colName, hmValues);
hmGroupedValue = hmValues;
}
// 从这里面选择最大的
Iterator<Map.Entry<String, Integer>> entrys = hmGroupedValue.entrySet().iterator();
int maxCounts = 0;
String sColValue = "";
while (entrys.hasNext()) {
Map.Entry<String, Integer> entry = entrys.next();
String s = entry.getKey();
int nCounts = entry.getValue();
if (nCounts > maxCounts) {
maxCounts = nCounts;
sColValue = s;
}
}
return new String[] { sColValue, String.valueOf(maxCounts) };
}
/**
* 联合行
*
* <p>
* 格式为:Xi1=XVi1;Xi2=XVi2 其中;为colDelims
* </p>
*
* @param rowIndex
* @return
*/
public String combineRow(int rowIndex, char colDelims) {
int nColCounts = m_Dataframe.getColCounts();
StringBuffer oStringBuffer = new StringBuffer();
for (int colIndex = 0; colIndex < nColCounts; colIndex++) {
if (colIndex > 0) {
oStringBuffer.append(colDelims);
}
String colName = m_Dataframe.getColNameAt(colIndex);
String colValue = m_Dataframe.getColValue(rowIndex, colIndex);
oStringBuffer.append(colName).append("=").append(colValue);
}
return oStringBuffer.toString();
}
/**
* 仅在选中的列中联合行
*
* @param pColNames
* @param rowIndex
* @param colDelims
* @return
*/
public String combineRow(List<String> pColNames, int rowIndex, char colDelims) {
// long lStartTime = System.currentTimeMillis();
StringBuffer oStringBuffer = new StringBuffer(2048);
int counts = 0;
for (int i = 0; i < pColNames.size(); i++) {
String colName = pColNames.get(i);
if (colName == null || colName.trim().equals("")) {
continue;
}
int colIndex = m_Dataframe.getColIndex(colName);
if (counts > 0) {
oStringBuffer.append(colDelims);
}
String colValue = m_Dataframe.getColValue(rowIndex, colIndex);
oStringBuffer.append(colName).append("=").append(colValue);
counts++;
}
// System.out.println("combine use
// time:"+(System.currentTimeMillis()-lStartTime));
return oStringBuffer.toString();
}
/**
* 解除行的联合.
*
* <p>
* ArrayList 返回值,第一个返回的是ColNames,第二个返回的是ColValues
* </p>
*
* @param sCombined
* @param colDelims
* @return
*/
public ArrayList<Object> uncombineRow(String sCombined, char colDelims) {
List<String> colNames = new ArrayList<String>();
List<String> colValues = new ArrayList<String>();
String[] colParts = StringUtil.split(sCombined, colDelims, false);
for (int i = 0; i < colParts.length; i++) {
String col0 = colParts[i];
int nIndexOf = col0.indexOf("=");
String colName = col0.substring(0, nIndexOf);
String colValue = col0.substring(nIndexOf + 1, col0.length());
colNames.add(colName);
colValues.add(colValue);
}
ArrayList<Object> v = new ArrayList<Object>();
v.add(colNames);
v.add(colValues);
return v;
}
/**
* 将数据集合输出到Excel
*
* @param sFile
* @throws Exception
*/
public void writeIntoXslx(String sFile, String sheetName, int titleRows, String titleRowSep) throws Exception {
FileOutputStream fOuts = null;
try {
File fFile = new File(sFile);
if (!fFile.getParentFile().exists()) {
fFile.getParentFile().mkdirs();
}
XSSFWorkbook wb = null;
wb = new XSSFWorkbook(); // 创建Excel工作薄对象
fOuts = new FileOutputStream(sFile);
if (sheetName == null || sheetName.equals(""))
sheetName = "工作表0";
XSSFSheet sheet = wb.createSheet(sheetName);
// 添加标题行
if (true) {
XSSFCellStyle cellStyle = wb.createCellStyle();
Font ztFont = wb.createFont();
ztFont.setFontHeightInPoints((short) 16);
cellStyle.setFont(ztFont);
Map<Integer, XSSFRow> rowsMap = new HashMap<Integer, XSSFRow>();
for (int i = 0; i < m_Dataframe.getColNames().size(); i++) {
String colName = m_Dataframe.getColNameAt(i);
String[] colNames = new String[] { colName };
if (titleRows > 1) {
colNames = StringUtil.split(colName, titleRowSep);
}
for (int j = 0; j < titleRows; j++) {
XSSFRow row = rowsMap.get(j);
if (rowsMap.get(j) == null) {
row = sheet.createRow(j);
rowsMap.put(j, row);
}
XSSFCell ztCell = row.createCell(i);
ztCell.setCellStyle(cellStyle);
if (colNames.length > j) {
ztCell.setCellValue(colNames[j]);
} else {
ztCell.setCellValue("");
}
}
}
}
// 添加内容行
if (true) {
int rowcounts = m_Dataframe.getRowCounts();
XSSFCellStyle cellStyle = wb.createCellStyle();
Font ztFont = wb.createFont();
ztFont.setFontHeightInPoints((short) 14);
cellStyle.setFont(ztFont);
for (int i = 0; i < rowcounts; i++) {
XSSFRow row = sheet.createRow(i + titleRows); // 注意标题行,1为标题行的数量
// 将列的值写入
int colcounts = m_Dataframe.getColCounts();
for (int j = 0; j < colcounts; j++) {
String colValue = m_Dataframe.getColValue(i, j);
XSSFCell ztCell = row.createCell(j);
ztCell.setCellValue(colValue);
ztCell.setCellStyle(cellStyle);
}
}
}
wb.write(fOuts);
fOuts.flush();
} catch (Exception ex) {
ex.printStackTrace();
} finally {
try {
fOuts.close();
} catch (Exception ex) {
}
}
}
/**
* 将数据集写入到文件中
*
* @param sFile
* @param fieldDelims
* @param nFileType
* @throws Exception
*/
public void writeIntoFile(String sFile, char fieldDelims, int nFileType) throws Exception {
if (nFileType != 1) {
throw new Exception("暂时不支持除tsv格式之外的数据.");
}
FileOutputStream fOuts = null;
BufferedWriter writer = null;
try {
File parentFile = new File(sFile).getParentFile();
if (!parentFile.exists()) {
parentFile.mkdirs();
}
fOuts = new FileOutputStream(sFile);
writer = new BufferedWriter(new OutputStreamWriter(fOuts, "UTF-8"));
String lineSep = System.getProperty("line.separator");
// 1.输出头信息
List<String> colNames = m_Dataframe.getColNames();
StringBuffer oStringBuffer = new StringBuffer();
for (int i = 0; i < colNames.size(); i++) {
if (i > 0)
oStringBuffer.append(fieldDelims);
oStringBuffer.append(colNames.get(i));
}
writer.write(oStringBuffer.toString() + lineSep);
// 2.输出行数据
int rowCounts = m_Dataframe.getRowCounts();
int colCounts = m_Dataframe.getColCounts();
for (int rowIndex = 0; rowIndex < rowCounts; rowIndex++) {
oStringBuffer.delete(0, oStringBuffer.length());
for (int colIndex = 0; colIndex < colCounts; colIndex++) {
if (colIndex > 0) {
oStringBuffer.append(fieldDelims);
}
String value = m_Dataframe.getColValue(rowIndex, colIndex);
if (value == null) {
value = "";
}
oStringBuffer.append(value);
}
writer.write(oStringBuffer.toString() + lineSep);
}
writer.flush();
} catch (Exception ex) {
ex.printStackTrace();
throw ex;
} finally {
CloseableUtils.closeQuietly(fOuts);
CloseableUtils.closeQuietly(writer);
}
}
/**
* 从文件中读取数据集
*
* @param inputFile
* @param fieldDelims
* @param nFileType
* @return
* @throws Exception
*/
public static DataframeInDC readFromFile(String inputFile, char fieldDelims, int nFileType) throws Exception {
return readFromFile(inputFile, -1, fieldDelims, nFileType);
}
/**
* 从文件中读取数据集,限定行数
*
* @param inputFile
* @param lMaxLines
* @param fieldDelims
* @param nFileType
* @return
* @throws Exception
*/
public static DataframeInDC readFromFile(String inputFile, long lMaxLines, char fieldDelims, int nFileType)
throws Exception {
if (nFileType != 1) {
throw new Exception("暂时不支持除tsv格式之外的数据.");
}
FileInputStream fIns = null;
BufferedReader reader = null;
try {
DataframeInDC dataframe = new DataframeInDC();
fIns = new FileInputStream(inputFile);
reader = new BufferedReader(new InputStreamReader(fIns, "UTF-8"));
String line;
int lineCount = 0;
while ((line = reader.readLine()) != null) {
if (lMaxLines != -1 && lineCount >= lMaxLines) {
break;
}
if (line.trim().length() == 0) {
continue;
}
System.out.println("lineCount:" + lineCount);
if (lineCount == 0) {
String[] lineParts = StringUtil.split(line, fieldDelims, false);
List<String> colNames = new ArrayList<String>(lineParts.length);
Map<String, String> hmColNames = new HashMap<String, String>();
for (int i = 0; i < lineParts.length; i++) {
String colName = lineParts[i];
if (hmColNames.containsKey(colName)) {
colName = colName + "_#_" + i;
}
colNames.add(colName);
hmColNames.put(lineParts[i], lineParts[i]);
}
dataframe.setColNames(colNames);
System.out.println("colNames.size():" + colNames.size());
dataframe.initBlankCols();
lineCount++;
continue;
}
String[] lineParts = StringUtil.split(line, fieldDelims, false);
dataframe.addRow(lineParts);
lineCount++;
}
return dataframe;
} catch (Exception ex) {
ex.printStackTrace();
throw ex;
} finally {
CloseableUtils.closeQuietly(reader);
CloseableUtils.closeQuietly(fIns);
}
}
/**
* 对变量X列上面的值,根据Y列的值进行分组
*
* @param yColName
* @param xColName
* @return
*/
public Map<String, Map<String, Integer>> groupWithY(String yColName, String xColName) throws Exception {
String[] yDistinctValues = getDistinctValues(yColName);
String[] xDistinctValues = getDistinctValues(xColName);
Map<String, Map<String, Integer>> hmGrouped = new HashMap<String, Map<String, Integer>>();
for (int xvIndex = 0; xvIndex < xDistinctValues.length; xvIndex++) {
String xValue = xDistinctValues[xvIndex];
Map<String, Integer> hm0 = new HashMap<String, Integer>();
for (int yvIndex = 0; yvIndex < yDistinctValues.length; yvIndex++) {
String yValue = yDistinctValues[yvIndex];
int n = counts(yColName, yValue, new String[] { xColName }, new String[] { xValue });
hm0.put(yValue, n);
}
hmGrouped.put(xValue, hm0);
}
return hmGrouped;
}
/**
* 对数据集进行数字化
*
* @param pIncludeCols
* @param pExcludeCols
* @return
* @throws Exception
*/
public DataframeInDC digitization(String[] pIncludeCols, String[] pExcludeCols, int nStartValue,
boolean bKeepDigitColValue) throws Exception {
if (m_Dataframe.getRowCounts() == 0) {
// 返回原始的Dataframe对象
return m_Dataframe;
}
Map<String, Map<String, String>> hmDigitizationInfos = new HashMap<String, Map<String, String>>();
DataframeInDC dataframeDigited = new DataframeInDC();
List<String> colNames = m_Dataframe.getColNames();
dataframeDigited.setColNames(AnalysisListUtils.cloneList(colNames));
for (int i = 0; i < colNames.size(); i++) {
String colName = colNames.get(i);
if (pIncludeCols != null && pIncludeCols.length > 0) {
// colName必须在pIncludeCols中
if (!StringUtil.isInArray(colName, pIncludeCols, false)) {
dataframeDigited.updateColValue(colName, m_Dataframe.getColValue(colName));
continue;
}
}
if (pExcludeCols != null && pExcludeCols.length > 0) {
// colName必须不在pExcludeCols中
if (StringUtil.isInArray(colName, pExcludeCols, false)) {
dataframeDigited.updateColValue(colName, m_Dataframe.getColValue(colName));
continue;
}
}
// 开始对列进行数字化,获取列数字化的信息
Map<String, String> colDigizationInfo = getColDigitizationInfo(nStartValue, bKeepDigitColValue, colName);
// 开始对列进行转换
ArrayList<Object> vDigitedValue = new ArrayList<Object>();
ArrayList<Object> vColValue = m_Dataframe.getColValue(colName);
for (int j = 0; j < vColValue.size(); j++) {
String colValue = (String) vColValue.get(j);
colValue = colValue.trim();
String digitedValue = colDigizationInfo.get(colValue);
vDigitedValue.add(digitedValue);
}
// 保存列的数字化信息
hmDigitizationInfos.put(colName, colDigizationInfo);
dataframeDigited.updateColValue(colName, vDigitedValue);
}
m_DigitizationInfo = hmDigitizationInfos;
return dataframeDigited;
}
/**
* 获取指定列的数字化值
*
* @param nStartValue
* @param bKeepDigitColValue
* @param colName
* @return
* @throws Exception
*/
private Map<String, String> getColDigitizationInfo(int nStartValue, boolean bKeepDigitColValue, String colName)
throws Exception {
String[] arDistinctValues = getDistinctValues(colName);
Map<String, String> colDigizationInfo = new HashMap<String, String>();
for (int j = 0; j < arDistinctValues.length; j++) {
int nDigit = nStartValue + j;
String currValue = arDistinctValues[j];
double d = 0;
if (isDigit(currValue)) {
// 本身是数字,
if (bKeepDigitColValue) {
d = Double.parseDouble(currValue.trim());
} else {
d = nDigit;
}
} else {
d = nDigit;
}
colDigizationInfo.put(currValue, String.valueOf(d));
}
return colDigizationInfo;
}
/**
* 返回列值的第一个位置,没有找到,返回-1
*
* @param colName
* @param sValue
* @param isIgnoreCase
* @return
*/
public int indexOfColValue(String colName, String sValue, boolean isIgnoreCase) {
int nIndexOf = -1;
String sValue0 = sValue.trim();
ArrayList<Object> v0 = m_Dataframe.getColValue(colName);
if (v0 == null)
return -1;
for (int i = 0; i < v0.size(); i++) {
String sCurr = StringUtil.getStr(v0.get(i));
if (sCurr == null) {
continue;
}
sCurr = sCurr.trim();
if (isIgnoreCase && sCurr.equalsIgnoreCase(sValue0)) {
nIndexOf = i;
break;
}
if (!isIgnoreCase && sCurr.equals(sValue0)) {
nIndexOf = i;
break;
}
}
return nIndexOf;
}
/**
* 对数据集做矩阵转置 将行-列进行转换
*
* @param pColAsRowName
* @param sRowNameForColName
* @return
*/
public DataframeInDC transpose(String pColAsRowName, String sRowNameForColName) throws Exception {
DataframeInDC dataframe0 = new DataframeInDC();
// 处理转置后的数据集的ColNames
List<String> colNames0 = new ArrayList<String>(m_Dataframe.getRowCounts() + 1);
int indexColValaueAsRowName = m_Dataframe.getColIndex(pColAsRowName);
colNames0.add(sRowNameForColName);
for (int i = 0; i < m_Dataframe.getRowCounts(); i++) {
// System.out.println("i:"+i);
String colValue = m_Dataframe.getColValue(i, indexColValaueAsRowName);
if (colValue == null || colValue.trim().equals("")) {
colValue = "__Column__" + i;
} else {
colValue = colValue.trim();
}
colNames0.add(colValue);
}
dataframe0.setColNames(colNames0);
System.out.println("dataframe0.setColNames finished.");
dataframe0.initBlankCols(m_Dataframe.getColNames().size());
System.out.println("dataframe0.initBlankCols finished.");
// 生成数据拼接进去
// int nRowCount = m_Dataframe.getRowCounts();
for (int i = 0; i < m_Dataframe.getColNames().size(); i++) {
System.out.println("i:" + i + ";" + Runtime.getRuntime().freeMemory() / (1024 * 1024) + "Mb");
String colName = m_Dataframe.getColNameAt(i).trim();
if (colName.equalsIgnoreCase(pColAsRowName)) {
// 忽略掉作为新的表头的信息
continue;
}
// 将列加入进去
dataframe0.getColValue(sRowNameForColName).add(colName);
// 将其他的列加入进去
for (int j = 0; j < m_Dataframe.getRowCounts(); j++) {
List<Object> newColValues = dataframe0.getColValue(j + 1);
String colValue = m_Dataframe.getColValue(j, i);
newColValues.add(colValue);
}
}
return dataframe0;
}
/**
* 是否是数字
*
* @param sValue
* @return
* @throws Exception
*/
private boolean isDigit(String sValue) throws Exception {
String s = sValue.trim();
boolean bIsDigit = false;
try {
Double.parseDouble(s);
bIsDigit = true;
} catch (Exception ex) {
}
return bIsDigit;
}
/**
* 获取最近一次数字化的信息
*
* @return
* @throws Exception
*/
public Map<String, Map<String, String>> getDigitizationInfo() throws Exception {
if (m_DigitizationInfo.isEmpty()) {
throw new Exception("请先调用数据集合数字化方法.");
}
return m_DigitizationInfo;
}
/**
* 对指定列进行离散
*
* @param colName
* @param valueDispersedColName
* @param valueDispersed
* @return
*/
public DataframeInDC valueDispersed(String colName, String valueDispersedColName, IValueDispersed valueDispersed)
throws Exception {
List<Object> colValues = m_Dataframe.getColValue(colName);
ArrayList<Object> dispersedColValues = new ArrayList<Object>(colValues.size());
for (int i = 0; i < colValues.size(); i++) {
String colValue = StringUtil.getStr(colValues.get(i));
String sDispersedValue = valueDispersed.getDispersedValue(colValue);
dispersedColValues.add(sDispersedValue);
}
if (colName.equalsIgnoreCase(valueDispersedColName)) {
m_Dataframe.updateColValue(valueDispersedColName, dispersedColValues);
} else {
m_Dataframe.addCol(valueDispersedColName, dispersedColValues);
}
return m_Dataframe;
}
/**
* 联合两个数据集,大的数据集在左边
*
* @param dataframe0
* @param dataframe1
* @param unionColumn0
* @param unionColumn1
* @return
* @throws Exception
*/
public static DataframeInDC union(DataframeInDC dataframe0, DataframeInDC dataframe1, String unionColumn0,
String unionColumn1) throws Exception {
DataframeInDC _dataframe0 = dataframe0;
String _unionColumn0 = unionColumn0;
DataframeInDC _dataframe1 = dataframe1;
String _unionColumn1 = unionColumn1;
if (dataframe0.getColCounts() < dataframe1.getColCounts()) {
_dataframe0 = dataframe1;
_unionColumn0 = unionColumn1;
_dataframe1 = dataframe0;
_unionColumn1 = unionColumn0;
}
// 根据左边的集合中进行联合列的顺序,为右侧构造一个全新的Dataframe
int nUnionColIndex0 = _dataframe0.getColIndex(_unionColumn0);
DataframeInDC newDataframe = new DataframeInDC();
newDataframe.setColNames(_dataframe1.getColNames());
newDataframe.initBlankCols();
for (int i = 0; i < _dataframe0.getRowCounts(); i++) {
String sUnionValue = _dataframe0.getColValue(i, nUnionColIndex0);
int rowIndex = _dataframe1.getRowIndexOfFirst(_unionColumn1, sUnionValue);
String[] newRowValues = null;
if (rowIndex != -1) {
newRowValues = _dataframe1.getRow(rowIndex);
} else {
newRowValues = new String[_dataframe1.getColCounts()];
for (int j = 0; j < newRowValues.length; j++) {
newRowValues[j] = "";
}
}
newDataframe.addRow(newRowValues);
}
// 开始进行联合,现在newDataframe的行顺序和左侧一致了
DataframeInDC dataframe = _dataframe0;
for (int i = 0; i < newDataframe.getColCounts(); i++) {
String newColName = newDataframe.getColNameAt(i);
if (newColName.equalsIgnoreCase(_unionColumn1))
continue;
if (dataframe.getColIndex(newColName) != -1) {
continue;
}
ArrayList<Object> colValues = newDataframe.getColValue(i);
dataframe.addCol(newColName, colValues);
}
return dataframe;
}
/**
* 对行进行普通抽样。<br/>
* 保证抽样出的结果不重复,牺牲一些独立性
*
* @param pRetainColNames
* 保留的记录条件字段名
* @param pRetainColValues
* 保留的记录条件字段值
* @param pSamplingColNames
* 抽样的记录条件字段名
* @param pSamplingColValues
* 抽样的记录条件字段值
* @return
*/
public DataframeInDC samplingRows(int samplingCount, String[] pRetainColNames, String[] pRetainColValues,
String[] pSamplingColNames, String[] pSamplingColValues) throws Exception {
DataframeInDC dataframe0 = m_Dataframe.selectChild(pRetainColNames, pRetainColValues);
DataframeInDC dataframe1 = m_Dataframe.selectChild(pSamplingColNames, pSamplingColValues);
if (samplingCount > dataframe1.getRowCounts()) {
throw new Exception("需要抽样的记录数目太多.");
}
Map<Integer, Integer> sampledIndex = new HashMap<Integer, Integer>();
int n = 0;
int rowCount = dataframe1.getRowCounts();
System.out.println("rowCount:" + rowCount);
while (n < samplingCount) {
boolean bSampled = false;
while (!bSampled) {
int nRandom = new Random(System.currentTimeMillis() + n * 1000).nextInt() % rowCount;
if (nRandom < 0)
nRandom = 0 - nRandom;
System.out.println(nRandom + ":" + sampledIndex.containsKey(nRandom));
if (!sampledIndex.containsKey(nRandom)) {
bSampled = true;
sampledIndex.put(nRandom, nRandom);
}
}
n++;
}
Iterator<Integer> keyIter = sampledIndex.keySet().iterator();
while (keyIter.hasNext()) {
int rowId = keyIter.next();
String[] row = dataframe1.getRow(rowId);
dataframe0.addRow(row);
}
return dataframe0;
}
/**
* 根据字段名和字段值获取行索引
*
* @param colNames
* @param colValues
* @return
* @throws Exception
*/
public int findRowIndex(String[] colNames, String[] colValues) throws Exception {
int[] colNameIndexes = new int[colNames.length];
for (int i = 0; i < colNames.length; i++) {
int colNameIndex = m_Dataframe.getColIndex(colNames[i]);
colNameIndexes[i] = colNameIndex;
}
for (int i = 0; i < m_Dataframe.getRowCounts(); i++) {
String[] currColValues = m_Dataframe.getRow(i);
boolean bIsEquals = true;
for (int j = 0; j < colNames.length; j++) {
String aColValue = colValues[j];
String currColValue = currColValues[colNameIndexes[j]];
if (!aColValue.equalsIgnoreCase(currColValue)) {
bIsEquals = false;
break;
}
}
if (bIsEquals) {
return i;
}
}
return -1;
}
public class ExcelReadX {
public DataframeInDC converter(String xslsFilePath, String sheetName)
throws Exception {
Workbook wb = null;
FileInputStream fIns = null;
try {
fIns = new FileInputStream(new File(xslsFilePath));
wb = WorkbookFactory.create(fIns);
Sheet sheet = wb.getSheet(sheetName);
// wb = new XSSFWorkbook(fIns);
// sheetName=基因分析
// XSSFSheet sheet = wb.getSheet(sheetName);
if (sheet == null) {
throw new Exception("Excel缺少sheet页[" + sheetName + "]");
}
// int nFirstRowNum = sheet.getFirstRowNum();
int nLastRowNum = sheet.getLastRowNum();
Row row1 = sheet.getRow(0);
int nCellNum1 = row1.getFirstCellNum();
int nCellNum2 = row1.getLastCellNum();
DataframeInDC dataframe = new DataframeInDC();
// 根据Excel的前1行,构造表头
Map<Integer, String> hmIndexColNames = new HashMap<Integer, String>();
List<String> colNames = new ArrayList<String>();
for (int colIndex = nCellNum1; colIndex <= nCellNum2; colIndex++) {
Cell cell1 = row1.getCell(colIndex);
// XSSFCell cell2 = row2.getCell(colIndex);
String cell1Value = ExcelUtils.getExcelCellValue(cell1);
if (cell1Value.equals(""))
cell1Value = "Column" + colIndex;
// String cell2Value = ExcelUtils.getExcelCellValue(cell2);
String colName = cell1Value;
/*
* if(!cell2Value.equals("")){ colName =
* cell1Value+"__"+cell2Value; }
*/colNames.add(colName);
hmIndexColNames.put(colIndex, colName);
}
dataframe.setColNames(colNames);
// 根据Excel的后面行,构造内容
for (int colIndex = nCellNum1; colIndex <= nCellNum2; colIndex++) {
ArrayList<Object> o = new ArrayList<Object>();
String colName = hmIndexColNames.get(colIndex);
for (int rowIndex = 1; rowIndex <= nLastRowNum; rowIndex++) {
Row row = sheet.getRow(rowIndex);
Cell cell = row.getCell(colIndex);
String cellValue = ExcelUtils.getExcelCellValue(cell);
o.add(cellValue);
}
dataframe.updateColValue(colName, o);
}
return dataframe;
} catch (Exception ex) {
ex.printStackTrace();
throw ex;
} finally {
if (fIns != null) {
try {
fIns.close();
fIns = null;
} catch (Exception ex) {
}
}
}
}
public class ExcelUtils {
public static String getExcelCellValue(XSSFCell cell) {
String cellValue = "";
if (cell != null) {
switch (cell.getCellType()) {
case HSSFCell.CELL_TYPE_NUMERIC: // 数字
// 数字类型处理
BigDecimal bd = new BigDecimal(cell.getNumericCellValue());
cellValue = bd.setScale(3, BigDecimal.ROUND_UP).toString();
break;
case HSSFCell.CELL_TYPE_STRING: // 字符串
cellValue = cell.getStringCellValue();
break;
case HSSFCell.CELL_TYPE_BOOLEAN: // Boolean
cellValue = String.valueOf(cell.getBooleanCellValue());
break;
case HSSFCell.CELL_TYPE_FORMULA: // 公式
cellValue = cell.getCellFormula();
break;
case HSSFCell.CELL_TYPE_BLANK: // 空值
cellValue = "";
break;
case HSSFCell.CELL_TYPE_ERROR: // 故障
cellValue = "";
break;
default:
cellValue = "";
break;
}
}
return cellValue.trim();
}
public static String getExcelCellValue(Cell cell) {
String cellValue = "";
if (cell != null) {
switch (cell.getCellType()) {
case HSSFCell.CELL_TYPE_NUMERIC: // 数字
String result = "";
if (HSSFDateUtil.isCellDateFormatted(cell)) {// 处理日期格式、时间格式
SimpleDateFormat sdf = null;
if (cell.getCellStyle().getDataFormat() == HSSFDataFormat
.getBuiltinFormat("h:mm")) {
sdf = new SimpleDateFormat("HH:mm");
} else {// 日期
sdf = new SimpleDateFormat("yyyy-MM-dd");
}
Date date = cell.getDateCellValue();
result = sdf.format(date);
} else if (cell.getCellStyle().getDataFormat() == 58) {
// 处理自定义日期格式:m月d日(通过判断单元格的格式id解决,id的值是58)
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
double value = cell.getNumericCellValue();
Date date = org.apache.poi.ss.usermodel.DateUtil
.getJavaDate(value);
result = sdf.format(date);
} else {
double value = cell.getNumericCellValue();
CellStyle style = cell.getCellStyle();
DecimalFormat format = new DecimalFormat();
String temp = style.getDataFormatString();
// 单元格设置成常规
if (temp.equals("General")) {
format.applyPattern("#");
}
result = format.format(value);
}
return result;
case HSSFCell.CELL_TYPE_STRING: // 字符串
cellValue = cell.getStringCellValue();
break;
case HSSFCell.CELL_TYPE_BOOLEAN: // Boolean
cellValue = String.valueOf(cell.getBooleanCellValue());
break;
case HSSFCell.CELL_TYPE_FORMULA: // 公式
cellValue = cell.getCellFormula();
break;
case HSSFCell.CELL_TYPE_BLANK: // 空值
cellValue = "";
break;
case HSSFCell.CELL_TYPE_ERROR: // 故障
cellValue = "";
break;
default:
cellValue = "";
break;
}
}
return cellValue.trim();
}
public static DataframeInDC readStandardXlsxFile(String xlsxFilePath,
String sheetName) throws Exception {
XSSFWorkbook wb = null;
FileInputStream fIns = null;
try {
fIns = new FileInputStream(new File(xlsxFilePath));
wb = new XSSFWorkbook(fIns);
// sheetName=基因分析
XSSFSheet sheet = wb.getSheet(sheetName);
if (sheet == null) {
throw new Exception("Excel缺少sheet页[" + sheetName + "]");
}
return getDataframeFromXSSFSheet(sheet);
} catch (Exception ex) {
ex.printStackTrace();
} finally {
CloseableUtils.closeQuietly(fIns);
}
return null;
}
public static DataframeInDC readStandardXlsxFile(String xlsxFilePath,
int sheetId) throws Exception {
XSSFWorkbook wb = null;
FileInputStream fIns = null;
try {
fIns = new FileInputStream(new File(xlsxFilePath));
wb = new XSSFWorkbook(fIns);
// sheetName=基因分析
XSSFSheet sheet = wb.getSheetAt(sheetId);
if (sheet == null) {
throw new Exception("Excel缺少sheet页[" + sheetId + "]");
}
return getDataframeFromXSSFSheet(sheet);
} catch (Exception ex) {
ex.printStackTrace();
} finally {
CloseableUtils.closeQuietly(fIns);
}
return null;
}
private static DataframeInDC getDataframeFromXSSFSheet(XSSFSheet sheet)
throws Exception {
System.out.println("sheet.getName():" + sheet.getSheetName());
int nLastRowNum = sheet.getLastRowNum();
// 读取数据,结合背景颜色进行过滤
DataframeInDC dataframe = new DataframeInDC();
for (int i = 0; i < nLastRowNum; i++) {
if (i == 0) {
// 标题栏
XSSFRow row = sheet.getRow(i);
List<String> colNames = new ArrayList<String>();
for (int j = 0; j < row.getLastCellNum(); j++) {
XSSFCell cell = row.getCell(j);
String cellValue = ExcelUtils.getExcelCellValue(cell);
if (cellValue == null || cellValue.trim().equals("")) {
cellValue = "Column" + j;
} else {
cellValue = cellValue.trim();
}
colNames.add(cellValue);
}
dataframe.setColNames(colNames);
dataframe.initBlankCols();
continue;
}
XSSFRow row = sheet.getRow(i);
int nColCounts = dataframe.getColCounts();
String[] rowValues = new String[nColCounts];
for (int j = 0; j < nColCounts; j++) {
if (j >= row.getLastCellNum()) {
rowValues[j] = "";
} else {
XSSFCell cell = row.getCell(j);
String cellValue = ExcelUtils.getExcelCellValue(cell);
rowValues[j] = cellValue.trim();
}
}
dataframe.addRow(rowValues);
}
return dataframe;
}
}