kettle通过java步骤获取汉字首拼
用途描述
一组数据,需要获取汉字首拼后,输出;
实现效果
添加jar包
pinyin4j-2.5.0.jar
自定义常量数据
Java代码
完整代码:
import net.sourceforge.pinyin4j.PinyinHelper;
import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;
String nameField;
String pyField;
public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException
{
// Let's look up parameters only once for performance reason.
//
if (first) {
nameField = "name";//getParameter("name");
pyField = "py" ; // getParameter("py");
first=false;
}
// First, get a row from the default input hop
//
Object[] r = getRow();
// If the row object is null, we are done processing.
//
if (r == null) {
setOutputDone();
return false;
}
// It is always safest to call createOutputRow() to ensure that your output row's Object[] is large
// enough to handle any new fields you are creating in this step.
//
Object[] outputRow = createOutputRow(r, data.outputRowMeta.size());
String name = get(Fields.In, nameField).getString(r);
// Set the value in the output field
//
String py = toFirstChar(name);
get(Fields.Out, pyField).setValue(outputRow, py);
// putRow will send the row on to the default output hop.
//
putRow(data.outputRowMeta, outputRow);
return true;
}
/**
* 获取字符串拼音的第一个字母
*/
public static String toFirstChar(String chinese){
// 特殊字符处理
if (null == chinese || chinese.isEmpty()) return "";
chinese=chinese.replace(" ","");
StringBuilder pinyinStr = new StringBuilder();
char[] newChar = chinese.toCharArray(); //转为单个字符
HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
for (int i = 0; i < newChar.length; i++) {
if (newChar[i] > 128) {
try {
String [] arr = PinyinHelper.toHanyuPinyinStringArray(newChar[i], defaultFormat);
if(null == arr || arr.length<1) continue;
pinyinStr.append(arr[0].charAt(0));
} catch (BadHanyuPinyinOutputFormatCombination e) {
e.printStackTrace();
}
}else{
pinyinStr.append(newChar[i]);
}
}
return pinyinStr.toString();
}
/**
* 汉字转为拼音
*/
public static String toPinyin(String chinese){
String pinyinStr = "";
char[] newChar = chinese.toCharArray();
HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
for (int i = 0; i < newChar.length; i++) {
if (newChar[i] > 128) {
try {
pinyinStr += PinyinHelper.toHanyuPinyinStringArray(newChar[i], defaultFormat)[0];
} catch (BadHanyuPinyinOutputFormatCombination e) {
e.printStackTrace();
}
}else{
pinyinStr += newChar[i];
}
}
return pinyinStr;
}
–END–