1.打开Kettle工具,创建转换
2.配置自定义常量数据
3.配置HTTP client
4.配置Java代码
public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException {
if (first) {
first = false;
/* TODO: Your code here. (Using info fields)
FieldHelper infoField = get(Fields.Info, "info_field_name");
RowSet infoStream = findInfoRowSet("info_stream_tag");
Object[] infoRow = null;
int infoRowCount = 0;
// Read all rows from info step before calling getRow() method, which returns first row from any
// input rowset. As rowMeta for info and input steps varies getRow() can lead to errors.
while((infoRow = getRowFrom(infoStream)) != null){
// do something with info data
infoRowCount++;
}
*/
}
Object[] r = getRow();
if (r == null) {
setOutputDone();
return false;
}
// It is always safest to call createOutputRow() to ensure that your output row's Object[] is large
// enough to handle any new fields you are creating in this step.
r = createOutputRow(r, data.outputRowMeta.size());
/* TODO: Your code here. (See Sample)
// Get the value from an input field
String foobar = get(Fields.In, "a_fieldname").getString(r);
foobar += "bar";
// Set a value in a new output field
get(Fields.Out, "output_fieldname").setValue(r, foobar);
*/
// Send the row on to the next step.
putRow(data.outputRowMeta, r);
return true;
}
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.mysql.jdbc.Connection;
import com.mysql.jdbc.PreparedStatement;
private String result;
private String contents;
private Connection connection = null;
public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException,SQLException{
if (first) {
result=getParameter("result");
first = false;
}
Object[] r = getRow();
if (r == null) {
setOutputDone();
return false;
}
Object[] outputRow = createOutputRow(r, data.outputRowMeta.size());
String foobar = get(Fields.In, result).getString(r);
String pattern ="<a[^>]*href(\\\"([^\\\"]*)\\\"|\\'([^\\']*)\\'|([^\\\\s>]*))[^>]*>(.*?)</a>";
Pattern patterns = Pattern.compile(pattern);
Matcher m = patterns.matcher(foobar);
while(m.find()){
get(Fields.Out, "contents").setValue(outputRow, m.group().replaceAll("<[^>]*>",""));
String url = "jdbc:mysql://localhost:3306/extract";
String userName = "root";
String userPwd = "123456";
try{
// 加载驱动程序
Class.forName("com.mysql.jdbc.Driver");
// 获取连接对象
connection= (Connection) DriverManager.getConnection(url, userName, userPwd);
} catch (Exception e) {
e.printStackTrace();
}
//要执行的SQL语句
String sql="insert into html (contents) values (?);";
PreparedStatement stat = (PreparedStatement) connection.prepareStatement(sql);
contents=m.group().replaceAll("<[^>]*>","");
stat.setString(1, contents);
//3.ResultSet类,用来存放获取的结果集!!
stat.executeUpdate();
putRow(data.outputRowMeta, outputRow);
}
return true;
}
5.运行转换
6.查看数据