为了建立一个英语词汇音标库,需要从网上搜索词汇音标,由于有10万多个词汇需要处理,所以做个这个单词音标爬虫爬取必应网络词典网页上的单词音标。
package bingword;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javafx.application.Application;
import javafx.beans.value.ChangeListener;
import javafx.beans.value.ObservableValue;
import javafx.concurrent.Worker;
import javafx.event.ActionEvent;
import javafx.event.EventHandler;
import javafx.geometry.Insets;
import javafx.scene.Scene;
import javafx.scene.control.Button;
import javafx.scene.control.Label;
import javafx.scene.control.TextArea;
import javafx.scene.control.TextField;
import javafx.scene.control.Tooltip;
import javafx.scene.layout.GridPane;
import javafx.scene.text.Font;
import javafx.scene.web.WebEngine;
import javafx.scene.web.WebView;
import javafx.stage.FileChooser;
import javafx.stage.Stage;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
public class BingWord extends Application {
private final GridPane grid = new GridPane();
private final TextField inputText = new TextField();
private final Button btn = new Button();
private final Button btn1 = new Button();
private final Button btn2 = new Button();
private final Button btnPause = new Button();
private final WebView browser = new WebView();
private final WebEngine webEngine = browser.getEngine();
private final TextArea textArea = new TextArea();
private final Label info = new Label();
private int recordPosition = 1;
private String currentWord = "";
private Connection connection = null;
private Statement statement;
private ResultSet rs;
private int maxNumber = 1;
private Boolean pause = false;
@Override
public void start(Stage primaryStage) throws SQLException {
//程序启动后就连接好SQLite数据库,或者新建一个dictionary.db数据库文件
connection = DriverManager.getConnection("jdbc:sqlite:dictionary.db");
statement = connection.createStatement();
statement.setQueryTimeout(30);
statement.executeUpdate("CREATE TABLE IF NOT EXISTS wordlist (id INTEGER PRIMARY KEY AUTOINCREMENT, word TEXT, pronunciation1 TEXT, pronunciation2 TEXT)");
System.out.println("dictionary.db数据库已经连接");
//文本框供手动设置开始行位置,默认值为第一行开始
inputText.setText("1");
//查找必应词典音标的启动按钮
Tooltip tooltip=new Tooltip();
tooltip.setText("请在右侧输入框内填入开始查找的行号");
tooltip.setFont(new Font("Arial", 20));
btn.setTooltip(tooltip);
btn.setText("开始查找音标行");
btn.setOnAction(new EventHandler<ActionEvent>() {
@Override
public void handle(ActionEvent event) {
//重启
pause = false;
//获取文本框中设定的起始行位置
recordPosition = Integer.parseInt(inputText.getText());
try {
//查询记录总条数
rs = statement.executeQuery("SELECT count(*) FROM wordlist;");
while (rs.next()) {
maxNumber = rs.getInt(1);
System.out.println("数据库中总单词量:" + maxNumber);
}
//单词音标搜索程序
search();
} catch (SQLException ex) {
Logger.getLogger(BingWord.class.getName()).log(Level.SEVERE, null, ex);
}