package com.lenovo.lhp.food;
import com.lenovo.lhp.food.entity.Food;
import com.lenovo.lhp.food.entity.FoodEnergyComposition;
import org.openqa.selenium.By;
import org.openqa.selenium.JavascriptExecutor;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import java.math.BigDecimal;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.List;
import java.util.Random;
public class FoodFetch {
private static String driver;//连接数据库的驱动
private static String url;
private static String username;
private static String password;
static {
driver = "com.mysql.cj.jdbc.Driver";//需要的数据库驱动
url = "jdbc:mysql://10.121.121.216:30040/lhp_defaults";//数据库名路径
username = "root";
password = "passw0rd";
}
public static Connection open() {
try {
Class.forName(driver);
return (Connection) DriverManager.getConnection(url, username, password);
} catch (Exception e) {
System.out.println("数据库连接失败!");
e.printStackTrace();
}//加载驱动
return null;
}
/*
* 关闭数据库
*/
public static void close(Connection conn) {
if (conn != null) {
try {
conn.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
}
static int insertFood(Long id, String chineseName, String primaryClassification, String secondaryClassification, Connection conn) {
String sql = "insert into com_food (id,chinese_name,primary_classification,secondary_classification)\n" +
" values (?,?,?,?)\n" +
" on duplicate key update\n" +
" id = ?,\n" +
" chinese_name = ?,\n" +
" primary_classification = ?,\n" +
" secondary_classification = ?";
int i = 0;
try {
PreparedStatement pstmt = (PreparedStatement) conn.prepareStatement(sql);
pstmt.setLong(1, id);
pstmt.setString(2, chineseName);
pstmt.setString(3, primaryClassification);
pstmt.setString(4, secondaryClassification);
pstmt.setLong(5, id);
pstmt.setString(6, chineseName);
pstmt.setString(7, primaryClassification);
pstmt.setString(8, secondaryClassification);
i = pstmt.executeUpdate();
} catch (SQLException e) {
e.printStackTrace();
}
return i;
}
static int insertFoodEnergyComposition(FoodEnergyComposition foodEnergyComposition, Connection conn) {
String sql = "insert into com_food_energy_composition (id,food_id,edible,edible_unit,moisture_content,moisture_content_unit,energy,\n" +
" energy_unit,protein,protein_unit,fat,fat_unit,cholesterol,cholesterol_unit,ash_content,ash_content_unit,\n" +
" carbohydrate,carbohydrate_unit,dietary_fiber,dietary_fiber_unit)\n" +
" values (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)\n" +
" on duplicate key update\n" +
" id = ?,\n" +
" food_id = ?,\n" +
" edible = ?,\n" +
" edible_unit = ?,\n" +
" moisture_content = ?,\n" +
" moisture_content_unit = ?,\n" +
" energy = ?,\n" +
" energy_unit = ?,\n" +
" protein = ?,\n" +
" protein_unit = ?,\n" +
" fat = ?,\n" +
" fat_unit = ?,\n" +
" cholesterol =?,\n" +
" cholesterol_unit = ?,\n" +
" ash_content = ?,\n" +
" ash_content_unit = ?,\n" +
" carbohydrate = ?,\n" +
" carbohydrate_unit = ?,\n" +
" dietary_fiber = ?,\n" +
" dietary_fiber_unit = ?";
int i = 0;
try {
PreparedStatement pstmt = (PreparedStatement) conn.prepareStatement(sql);
pstmt.setLong(1, foodEnergyComposition.getId());
pstmt.setLong(2, foodEnergyComposition.getFoodId());
pstmt.setBigDecimal(3, foodEnergyComposition.getEdible());
pstmt.setString(4, foodEnergyComposition.getEdibleUnit());
pstmt.setBigDecimal(5, foodEnergyComposition.getMoistureContent());
pstmt.setString(6, foodEnergyComposition.getMoistureContentUnit());
pstmt.setBigDecimal(7, foodEnergyComposition.getEnergy());
pstmt.setString(8, foodEnergyComposition.getEnergyUnit());
pstmt.setBigDecimal(9, foodEnergyComposition.getProtein());
pstmt.setString(10, foodEnergyComposition.getProteinUnit());
pstmt.setBigDecimal(11, foodEnergyComposition.getFat());
pstmt.setString(12, foodEnergyComposition.getFatUnit());
pstmt.setBigDecimal(13, foodEnergyComposition.getCholesterol());
pstmt.setString(14, foodEnergyComposition.getCholesterolUnit());
pstmt.setBigDecimal(15, foodEnergyComposition.getAshContent());
pstmt.setString(16, foodEnergyComposition.getAshContentUnit());
pstmt.setBigDecimal(17, foodEnergyComposition.getCarbohydrate());
pstmt.setString(18, foodEnergyComposition.getCarbohydrateUnit());
pstmt.setBigDecimal(19, foodEnergyComposition.getDietaryFiber());
pstmt.setString(20, foodEnergyComposition.getDietaryFiberUnit());
pstmt.setLong(21, foodEnergyComposition.getId());
pstmt.setLong(22, foodEnergyComposition.getFoodId());
pstmt.setBigDecimal(23, foodEnergyComposition.getEdible());
pstmt.setString(24, foodEnergyComposition.getEdibleUnit());
pstmt.setBigDecimal(25, foodEnergyComposition.getMoistureContent());
pstmt.setString(26, foodEnergyComposition.getMoistureContentUnit());
pstmt.setBigDecimal(27, foodEnergyComposition.getEnergy());
pstmt.setString(28, foodEnergyComposition.getEnergyUnit());
pstmt.setBigDecimal(29, foodEnergyComposition.getProtein());
pstmt.setString(30, foodEnergyComposition.getProteinUnit());
pstmt.setBigDecimal(31, foodEnergyComposition.getFat());
pstmt.setString(32, foodEnergyComposition.getFatUnit());
pstmt.setBigDecimal(33, foodEnergyComposition.getCholesterol());
pstmt.setString(34, foodEnergyComposition.getCholesterolUnit());
pstmt.setBigDecimal(35, foodEnergyComposition.getAshContent());
pstmt.setString(36, foodEnergyComposition.getAshContentUnit());
pstmt.setBigDecimal(37, foodEnergyComposition.getCarbohydrate());
pstmt.setString(38, foodEnergyComposition.getCarbohydrateUnit());
pstmt.setBigDecimal(39, foodEnergyComposition.getDietaryFiber());
pstmt.setString(40, foodEnergyComposition.getDietaryFiberUnit());
i = pstmt.executeUpdate();
} catch (SQLException e) {
e.printStackTrace();
}
return i;
}
public static void main(String[] args) throws Exception {
//打开浏览器的按照地址 设置驱动类型和名称
System.setProperty("webdriver.chrome.driver", "D:\\software\\chromedriver_win32\\chromedriver.exe");
WebDriver driver = new ChromeDriver();
JavascriptExecutor executor = (JavascriptExecutor) driver;
driver.get("http://yycx.yybq.net/searchlist__1.htm");
driver.manage().window().maximize();
Thread.sleep(3000);
//点击搜索框
driver.findElement(By.xpath("/html/body/div[2]/div[1]/button")).click();
Thread.sleep(3000);
Connection conn = FoodFetch.open();
//1 每次从列表拿出十个数
//2 翻页更新列表
int i = 1;
while (true) {
//1-10 11-20
List<WebElement> elements = driver.findElements(By.xpath("//div[@class='public_margin_top ysq_div_list']/div[position()>=" + i + " and position()<=" + (i + 9) + "]/a"));
Thread.sleep(3000);
elements.stream().forEach(element -> {
Food food = new Food();
food.setId(Long.valueOf(getRandomUUID()));
FoodEnergyComposition foodEnergyComposition = new FoodEnergyComposition();
foodEnergyComposition.setId(Long.valueOf(getRandomUUID()));
foodEnergyComposition.setFoodId(food.getId());
//进入详情页
executor.executeScript("arguments[0].click();", element);
try {
Thread.sleep(3000);
} catch (InterruptedException e) {
e.printStackTrace();
}
//获取能量与成分
List<WebElement> energyCompositionElements = driver.findElements(By.xpath("//div[@class='details_table'][1]//tr"));
List<WebElement> edibleElements = energyCompositionElements.get(1).findElements(By.xpath("./*"));//获取食部
try {
foodEnergyComposition.setEdible(new BigDecimal(edibleElements.get(1).getText().split("%")[0]));
} catch (Exception e) {
foodEnergyComposition.setEdible(new BigDecimal("0.0"));
}
foodEnergyComposition.setEdibleUnit("%");
List<WebElement> moistureElements = energyCompositionElements.get(2).findElements(By.xpath("./*"));//获取水分
try {
foodEnergyComposition.setMoistureContent(new BigDecimal(moistureElements.get(1).getText().split("g")[0]));
} catch (Exception e) {
foodEnergyComposition.setMoistureContent(new BigDecimal("0.0"));
}
foodEnergyComposition.setMoistureContentUnit("g");
List<WebElement> energyElements = energyCompositionElements.get(3).findElements(By.xpath("./*"));//获取能量
try {
foodEnergyComposition.setEnergy(new BigDecimal(energyElements.get(1).getText().split("KJ")[0]));
} catch (Exception e) {
foodEnergyComposition.setEnergy(new BigDecimal("0.0"));
}
foodEnergyComposition.setEnergyUnit("KJ");
List<WebElement> proteinElements = energyCompositionElements.get(4).findElements(By.xpath("./*"));//获取蛋白质
try {
foodEnergyComposition.setProtein(new BigDecimal(proteinElements.get(1).getText().split("g")[0]));
} catch (Exception e) {
foodEnergyComposition.setProtein(new BigDecimal("0.0"));
}
foodEnergyComposition.setProteinUnit("g");
List<WebElement> fatElements = energyCompositionElements.get(5).findElements(By.xpath("./*"));//获取脂肪
try {
foodEnergyComposition.setFat(new BigDecimal(fatElements.get(1).getText().split("g")[0]));
} catch (Exception e) {
foodEnergyComposition.setFat(new BigDecimal("0.0"));
}
foodEnergyComposition.setFatUnit("g");
List<WebElement> cholesterolElements = energyCompositionElements.get(6).findElements(By.xpath("./*"));//获取胆固醇
try {
foodEnergyComposition.setCholesterol(new BigDecimal(cholesterolElements.get(1).getText().split("g")[0]));
} catch (Exception e) {
foodEnergyComposition.setCholesterol(new BigDecimal("0.0"));
}
foodEnergyComposition.setCholesterolUnit("g");
List<WebElement> ashContentElements = energyCompositionElements.get(7).findElements(By.xpath("./*"));//获取灰分
try {
foodEnergyComposition.setAshContent(new BigDecimal(ashContentElements.get(1).getText().split("g")[0]));
} catch (Exception e) {
foodEnergyComposition.setAshContent(new BigDecimal("0.0"));
}
foodEnergyComposition.setAshContentUnit("g");
List<WebElement> carbohydrateElements = energyCompositionElements.get(8).findElements(By.xpath("./*"));//获取碳水化合物
try {
foodEnergyComposition.setCarbohydrate(new BigDecimal(carbohydrateElements.get(1).getText().split("g")[0]));
} catch (Exception e) {
foodEnergyComposition.setCarbohydrate(new BigDecimal("0.0"));
}
foodEnergyComposition.setCarbohydrateUnit("g");
List<WebElement> dietaryFiberElements = energyCompositionElements.get(9).findElements(By.xpath("./*"));//获取总膳食纤维
try {
foodEnergyComposition.setDietaryFiber(new BigDecimal(dietaryFiberElements.get(1).getText().split("g")[0]));
} catch (Exception e) {
foodEnergyComposition.setDietaryFiber(new BigDecimal("0.0"));
}
foodEnergyComposition.setDietaryFiberUnit("g");
//获取后返回上一页
driver.navigate().back(); //返回第一次访问的网页,即后退功能
try {
Thread.sleep(10000);
} catch (InterruptedException e) {
e.printStackTrace();
}
//
// executor.executeScript("arguments[0].click();", driver.findElement(By.xpath("/html/body/div[2]/a/img")));
List<WebElement> elements1 = element.findElements(By.xpath("./*"));
elements1.stream().forEach(element1 -> {
if (element1.getAttribute("class").equals("weui_media_bd")) {
List<WebElement> elements2 = element1.findElements(By.xpath("./*"));
elements2.stream().forEach(element2 -> {
if (element2.getAttribute("class").equals("list_title")) {
food.setChineseName(element2.getText());
System.out.println(element2.getText());
} else if (element2.getAttribute("class").equals("list_type")) {
String[] classify = element2.getText().split("-");
food.setPrimaryClassification(classify[0]);
if (classify.length == 2) {
food.setSecondaryClassification(classify[1]);
} else {
food.setSecondaryClassification(null);
}
if (insertFood(food.getId(), food.getChineseName(), food.getPrimaryClassification(), food.getSecondaryClassification(), conn) > 0) {
insertFoodEnergyComposition(foodEnergyComposition, conn);
}
System.out.println(element2.getText());
}
});
}
});
});
executor.executeScript("window.scrollBy(0,5000)");
Thread.sleep(4000);
i = i + 10;
}
//关闭浏览器
// driver.quit();
}
public static String getRandomUUID() {
//1、创建时间戳
Date dateNow = new Date();
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyyMMddHHmmss");
String dateNowStr = dateFormat.format(dateNow);
StringBuffer sb = new StringBuffer(dateNowStr);
//2、创建随机对象
Random rd = new Random();
//3、产生4位随机数
String n = "";
int rdGet;//取得随机数
do {
rdGet = Math.abs(rd.nextInt()) % 10 + 48; //产生48到57的随机数(0-9的键位值)
//rdGet=Math.abs(rd.nextlnt0))%626+97;// 产生97到122的随机数(a-z的键位值)
char num1 = (char) rdGet;
String dd = Character.toString(num1);
n += dd;
} while (n.length() < 4); // 假如长度小于4
sb.append(n);
//4、返回唯一码return
return sb.toString();
}
}
Java+xpath爬虫实现食物数据抓取
于 2023-05-17 17:46:48 首次发布