1,客户端编写:
public class SqoopTest {
public static void main(String[] args) {
SqoopOptions options = new SqoopOptions();
options.setConnectString("jdbc:oracle:thin:@192.168.176.11:1521:orcl");
options.setTableName("STUDENT2");
options.setWhereClause("id > 0"); // this where clause works when importing whole table, ie when setTableName() is used
options.setUsername("edata");
options.setPassword("edata");
options.setDirectMode(true); // Make sure the direct mode is off when importing data to HBase
options.setNumMappers(1); // Default value is 4
options.setSqlQuery("SELECT * FROM student2 WHERE $CONDITIONS ");
options.setSplitByCol("id");
String[] strs = {"id","name","age"} ;
options.setColumns(strs);
// HBase options
options.setHBaseTable("xun_sq");
options.setHBaseColFamily("f");
options.setCreateHBaseTable(true); // Create HBase table, if it does not exist
options.setHBaseRowKeyColumn("id,name");
// options.setHBaseRowKeyColumn("age");
Configuration config = new Configuration();
//如果该语句不起作用,就在sqoop的conf.xml文件配置下并重启sqoop,这行是自定义rowkey生成规则
config.set("sqoop.hbase.insert.put.transformer.class", "com.edata.sqoop.DdtMapInfoTransFormat");
// config.set("sqoop.hbase.add.row.key","true");
config.setBoolean("sqoop.hbase.add.row.key",true);
String dd = config.get("sqoop.hbase.add.row.key");
options.setConf(config);
int ret = new ImportTool().run(options);
}
}
2,如果自定义rowkey规则代码:
public class DdtMapInfoTransFormat extends PutTransformer {
public static final Log LOG = LogFactory.getLog(DdtMapInfoTransFormat.class.getName());
private Map<String, byte[]> serializedFieldNames;
public DdtMapInfoTransFormat() {
serializedFieldNames = new TreeMap<String, byte[]>();
}
/**
* Return the serialized bytes for a field name, using the cache if it's
* already in there.
*/
private byte[] getFieldNameBytes(String fieldName) {
byte[] cachedName = serializedFieldNames.get(fieldName);
if (null != cachedName) {
// Cache hit. We're done.
return cachedName;
}
// Do the serialization and memoize the result.
byte[] nameBytes = Bytes.toBytes(fieldName);
serializedFieldNames.put(fieldName, nameBytes);
return nameBytes;
}
@Override
/** {@inheritDoc} */
public List<Put> getPutCommand(Map<String, Object> fields)throws IOException {
String rowKeyCol = getRowKeyColumn();
//if many columns make up rowkey
String colFamily = getColumnFamily();
String[] rowkeyFamilys = rowKeyCol.split(",");
String rowKey = "";
for(int i = 0;i < rowkeyFamilys.length;i++){
rowKey += fields.get(rowkeyFamilys[i])+"";
}
byte[] colFamilyBytes = Bytes.toBytes(colFamily);
// Object rowKey = fields.get(rowKeyCol);
// com.cloudera.sqoop.SqoopOptions
if (null == rowKey) {
// If the row-key column is null, we don't insert this row.
LOG.warn("Could not insert row with null value for row-key column: "+ rowKeyCol);
return null;
}
Put put = new Put(Bytes.toBytes(rowKey.toString() + ":uuid"));
for (Map.Entry<String, Object> fieldEntry : fields.entrySet()) {
String colName = fieldEntry.getKey();
List rowkeyColList = Arrays.asList(rowkeyFamilys);
// This is a regular field, not the row key.
// Add it if it's not null.
Object val = fieldEntry.getValue();
if (null != val) {
put.add(colFamilyBytes, getFieldNameBytes(colName),Bytes.toBytes(val.toString()));
}
//
}
return Collections.singletonList(put);
}
}