爬虫之webDriver基本使用以及滑动验证码

package com.zt.taobao.personal;

import java.io.File;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Set;

import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.openqa.selenium.By;
import org.openqa.selenium.JavascriptExecutor;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import org.openqa.selenium.interactions.Actions;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.RequestMapping;

import com.zt.components.util.RegexUtil;
import com.zt.config.spring.FileStoreConfig;
import com.zt.taobao.util.ZipFileUtil;

/**
 * 采集淘宝网 userid
 * @date 2018-08-07
 * @author lizhifeng
 * */

@Controller
@RequestMapping("XXXXXXX")
public class TaoBaoApiUserIdTest {
	
	@Autowired
	private FileStoreConfig fileConfig;
	
	@RequestMapping("run")
	public void getUserId() {
		// 登录URL
		String alimamaLogin = "https://www.alimama.com/member/login.htm";
		// userid URL
		String url = "https://pub.alimama.com/items/search.json?q=%s&toPage=%s&perPageSize=100";
		System.setProperty("webdriver.chrome.driver", "D:/selenium/chromedriver.exe");
		ChromeOptions options = new ChromeOptions();
		// options.addArguments("--headless");
		options.addArguments("--disable-infobars");
		options.addArguments("--user-data-dir=C:\\Users\\Administrator\\AppData\\Local\\Google\\Chrome\\User Data");
		WebDriver driver = new ChromeDriver();
		driver.manage().window().maximize();
		// 打开多个窗口
		JavascriptExecutor je = (JavascriptExecutor) driver;
		
		try {
			String taoBaoApiTypeDir = fileConfig.getTaoBaoApiTypeDir();
			String taoBaoApiUserIdDir = fileConfig.getTaoBaoApiUserIdDir();
			
			File txtRoot = new File(taoBaoApiTypeDir);
			File[] FileList = txtRoot.listFiles();
			int pageNum = 1;// 标记当前页码
			for (File txtFile : FileList) {
				List<String> list = FileUtils.readLines(txtFile, "UTF-8");
				System.out.println(list.size());
				for (String lineText : list) {
					while (true) {
						Set<String> handles = driver.getWindowHandles();
						@SuppressWarnings("unchecked")
						List handleslist = new ArrayList(handles);
						if(handleslist.size() > 0) {
							driver.switchTo().window(handleslist.get(0).toString());
						}
						driver.get(String.format(url, lineText, pageNum));
						System.out.println(String.format(url, lineText, pageNum));
						String html = driver.findElement(By.tagName("html")).getAttribute("outerHTML");
						if(handles.size() <3){
							if(handles.size() == 1 && StringUtils.contains(html, alimamaLogin)) {// 登录阿里妈妈
								pageNum = 4979;
								je.executeScript("window.open('"+alimamaLogin+"')");
								System.out.println("请扫码登录!!!");
								Thread.sleep(50 * 1000);
								continue;
							} else if(handles.size() == 2 && StringUtils.contains(html, "x5step=2")) {// 拖动滑块
								String hkUrl=RegexUtil.get(html, "\"url\": \"(.*?)\"", 1);
								je.executeScript("window.open('"+hkUrl+"')");
								System.out.println("请拖动滑块!!!");
								// [定位]滑块元素
								WebElement button = driver.findElement(By.id("nc_1_n1z"));

								// 定义拖动的轨迹
								Actions action = new Actions(driver);
								action.clickAndHold(button);
								action.moveByOffset(130, 0).pause(500);
								action.moveByOffset(130, 0).pause(500);
								action.moveToElement(button).release();
								// 执行拖动
								action.build().perform();
								Thread.sleep(10000);
								continue;
							} else if (StringUtils.contains(html, "sellerId")) {
								// 将html写入txt,然后打成压缩包
								String nowDate = new SimpleDateFormat("yyyy-MM-dd").format(new Date());
								String rootDir = taoBaoApiUserIdDir + nowDate + "/";
								File htmlTxt = new File(rootDir + lineText + "-" + pageNum + ".txt");
								File zipFile = new File(rootDir + lineText + "-" + pageNum + ".zip");
								// 写入文本文件
								FileUtils.write(htmlTxt, html, "UTF-8");
								// 写入 zip 压缩文件
								ZipFileUtil.doCompress(htmlTxt, zipFile);
								// 删除文本文件
								FileUtils.forceDelete(htmlTxt);
								System.out.println("--- 打包成功 ---> " + lineText + "-" + pageNum + "----");
								pageNum++;
							} else if(StringUtils.contains(html, "你可以先逛逛")) {
								Thread.sleep(300000);
								continue;
							} else {
								pageNum = 1;
								break;
							}
						}
					}
				}
			}
		} catch (IOException e) {
			e.printStackTrace();
		} catch (InterruptedException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		
		System.out.println("-----------------------------------------------------");
		System.out.println("-					采集任务结束,请关闭!					-");
		System.out.println("-----------------------------------------------------");
	}

}

 

  • 0
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值