抓取微信群聊人数,并保存到excel表格

操作流程:

1. 通过selenium调起微信web页面;

2. 扫描二维码,登录;

3. 遍历列表,并抓取群聊人数;

4. 将结果写入excel表格。


注:用到的selenium库,excel操作库,chromedriver请网上查找下载并添加到运行库里面或者放到指定目录(详见代码)


代码:

package com.yang.selenium;


import java.io.File;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import org.openqa.selenium.By;
import org.openqa.selenium.JavascriptExecutor;
import org.openqa.selenium.Keys;
import org.openqa.selenium.NoSuchElementException;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.interactions.Actions;
import org.openqa.selenium.support.ui.ExpectedCondition;
import org.openqa.selenium.support.ui.WebDriverWait;


import jxl.Workbook;
import jxl.read.biff.BiffException;
import jxl.write.Label;
import jxl.write.WritableSheet;
import jxl.write.WritableWorkbook;
import jxl.write.WriteException;
import jxl.write.biff.RowsExceededException;


public class Chat {


public static void main(String[] args) throws WriteException, IOException,
InterruptedException, BiffException {
// TODO Auto-generated method stub


WebDriver wd;


String resultDir = "c:\\test\\";
String driverDir = "D:\\bak\\" + "chromedriver.exe";
System.setProperty("webdriver.chrome.driver", driverDir);
wd = new ChromeDriver();
wd.get("https://web2.wechat.com/");
// wd.manage().timeouts().implicitlyWait(10, TimeUnit.SECONDS);
By chatSelector = By
.cssSelector("div[class='chat_item slide-left ng-scope']");


while (!checkWebElementExist(wd, chatSelector)) {
System.out.println("请扫码二维码,并等待加载完成后运行。。。");
Thread.sleep(1000);
}
By tabContactSelector = By
.cssSelector("div[class='tab_item no_extra']");
By sendMessageSelector = By.cssSelector("a[class='button']");
WebElement tabContact = wd.findElement(tabContactSelector);
tabContact.click(); // 点击通讯录tab(群聊,好友)
System.out.println("***********************************************");
System.out.println("**********  请不要改动代码,否则可能影响运行结果       **********");
System.out.println("**********  如运行结果有偏差,请联系技术人员排查       **********");
System.out.println("********  数据加载中,15秒(加载时间)之后将开始运行      ********");
System.out.println("***********************************************");
System.out.println();
System.out.println();
System.out.println();
Thread.sleep(15000);
SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
String startTime = df.format(new Date());
int counter = 20; // 每 counter个去判断下
System.out.println("开始运行: " + startTime);
System.out.println();


wd.findElement(By.cssSelector("h4[class='nickname ng-binding']"))
.click(); // 点击第一个
boolean up = false;
int bottom = 0, i = 0;
Thread.sleep(300);
String last_name = "-", latest_name = "-";
for (int p = 0; p < 2; p++) { // 上下移动,走遍通讯录,让通讯录加载完整
while (bottom < 9) {
last_name = latest_name;
move(wd, up);
latest_name = getValue(wd);
if (latest_name.trim().equals(last_name)) {
bottom++;
} else {
bottom = 0;
}
if (bottom == 9) {
up = true;
bottom = 0;
break;
}
}
}
System.out.println("通讯录 加载 完成时间: " + df.format(new Date()));


int j = 0;
String latest_value;
while (i < counter) { // 通讯录每个去点击一下,并添加到聊天列表里面去
i++;
doClick(wd, sendMessageSelector, tabContactSelector);
if (i == counter) {
Thread.sleep(1000);
latest_value = getValue(wd).trim();
doClick(wd, sendMessageSelector, tabContactSelector);
if (latest_value.equals(getValue(wd).trim())) {
j++;
} else {
i = 0;
j = 0;
}
if (j == 5) {
break;
}
}
}
System.out.println("通讯录 点击 完成时间: " + df.format(new Date()));


wd.findElement(By.cssSelector("i[class='web_wechat_tab_chat']"))
.click();
begainCount(wd, resultDir, counter);


/*
* for(String key : map.keySet()){ System.out.println(key + " : " +
* map.get(key)); }
*/


System.out.println("导出数据成功!!!");

String endTime = df.format(new Date());
System.out.println("***********************************************");
System.out.println("开始运行时间: " + startTime);
System.out.println("结束运行时间: " + endTime);
Thread.sleep(60000);
wd.close();
}


public static void begainCount(WebDriver wd, String resultDir, int counter)
throws InterruptedException, RowsExceededException, WriteException,
IOException, BiffException {
int k = 0, j = 0;
String latest_name = "-";
String title_name = "-", title_count = "0";
By title_countSelector = By
.cssSelector("span[class='title_count ng-binding ng-scope']");


HashMap<String, String> map = new HashMap<String, String>();
boolean up = false;
for (int p = 0; p < 2; p++) { // 聊天列表页,上下各走一遍,记录数据
while (k < counter) {
k++;
latest_name = title_name.trim();
JavascriptExecutor js = (JavascriptExecutor) wd;
if (checkWebElementExist(wd, title_countSelector)) {
title_count = (String) js
.executeScript("return document.getElementsByClassName(\"title_count ng-binding ng-scope\")[0].innerText;");
title_count = title_count.substring(
title_count.indexOf("(") + 1,
title_count.lastIndexOf(")"));
title_name = (String) js
.executeScript("return document.getElementsByClassName(\"title_name ng-binding\")[0].innerText");
System.out.println("群聊 -> " + title_name + " : "
+ title_count);
map.put(title_name, title_count);
}


if (k == counter) {
Thread.sleep(100);
title_name = (String) js
.executeScript("return document.getElementsByClassName(\"title_name ng-binding\")[0].innerText");
if (latest_name.equals(title_name.trim())) {
j++;
k = counter - 1;
} else {
j = 0;
k = 0;
}
if (j == 5) {
k = 0;
j = 0;
up = true;
break;
}
}
move(wd, up);
Thread.sleep(500);
}
}
writeExcel(resultDir, map);
}


public static String getValue(WebDriver wd) {
JavascriptExecutor js = (JavascriptExecutor) wd;
String value_name = (String) (String) js
.executeScript("return document.getElementsByClassName(\"value ng-binding\")[0].innerText");
return value_name;
}


public static String getName(WebDriver wd) {
// 获取群名
String title_name;
JavascriptExecutor js = (JavascriptExecutor) wd;
title_name = (String) js
.executeScript("return document.getElementsByClassName(\"title_name ng-binding\")[0].innerText");
return title_name;
}


public static String getCount(WebDriver wd) {
// 获取群人数
String title_count;
JavascriptExecutor js = (JavascriptExecutor) wd;
title_count = (String) js
.executeScript("return document.getElementsByClassName(\"title_count ng-binding ng-scope\")[0].innerText;");
title_count = title_count.substring(title_count.indexOf("(") + 1,
title_count.lastIndexOf(")"));
return title_count;
}


public static boolean checkWebElementExist(WebDriver wd, By selector) {
// 判断元素是否存在
try {
wd.findElement(selector);
return true;
} catch (NoSuchElementException e) {
return false;
}
}


public static void move(WebDriver wd, boolean up)
throws InterruptedException {
// 上下移动
if (up == true) {
(new Actions(wd)).sendKeys(Keys.ARROW_UP).build().perform();
} else {
(new Actions(wd)).sendKeys(Keys.ARROW_DOWN).build().perform();
}
Thread.sleep(30);
}


public static void writeExcel(String resultDir, HashMap<String, String> map)
throws InterruptedException, IOException, RowsExceededException,
WriteException, BiffException {
// 写excel表格数据
String fileName = mkDateDir(resultDir) + "\\"
+ new SimpleDateFormat("yyyy_MM_dd_HH_mm").format(new Date())
+ ".xls";
File file = new File(fileName);
WritableWorkbook book;
if (file.exists()) {
file.delete();
}
book = Workbook.createWorkbook(file);
WritableSheet sheet = book.createSheet("群聊人数统计", 0);
int m = 1, sum = 0;
Label xiaoqu_title = new Label(0, 0, "小区名");
Label renshu_title = new Label(1, 0, "人数");
Label xiaoqu, renshu;
sheet.addCell(xiaoqu_title);
sheet.addCell(renshu_title);


System.out.println("总结: ");
for (String key : map.keySet()) {
xiaoqu = new Label(0, m, key);
renshu = new Label(1, m, map.get(key));
sheet.addCell(renshu);
sheet.addCell(xiaoqu);
m++;
sum += Integer.parseInt(map.get(key));
System.out.println(key + " : " + map.get(key));
}
sheet.addCell(new Label(0, m, "总数"));
sheet.addCell(new Label(1, m, Integer.toString(sum)));


book.write();
book.close();
}


public static String mkDateDir(String reportDir) throws BiffException,
IOException {
// 创建报告存放目录,以日期为标记


String timeDir = new SimpleDateFormat("yyyy_MM_dd").format(new Date());
reportDir = reportDir + "\\\\" + timeDir;


File file = new File(reportDir);
if (!file.getParentFile().exists()) {
file.getParentFile().mkdir();
}
if (!file.exists() && !file.isDirectory()) {
file.mkdir();
}
return file.getAbsolutePath();
}


public static void doClick(WebDriver wd, final By sendMessageSelector,
By tabContactSelector) throws InterruptedException {
// 1. 点击发消息,跳到聊天列表; 2. 点击通讯录; 3。向下按
WebDriverWait wait = (new WebDriverWait(wd, 10));
wait.until(new ExpectedCondition<WebElement>() {
public WebElement apply(WebDriver d) {
return d.findElement(sendMessageSelector);
}
}).click();
// wd.findElement(sendMessageSelector).click();
// Thread.sleep(150);
wd.findElement(tabContactSelector).click();
Thread.sleep(50);
move(wd, false);
}
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值