Python网络爬虫，pyperclip与pyautogui模拟用户双击选择复制粘贴微博关键数据

本文链接：https://blog.csdn.net/zhangphil/article/details/112059441

以成都地铁官微为例，爬虫爬取成都地铁官微的粉丝数量：

# pyautogui通过模板匹配，找到新浪微博账户显示粉丝数量的位置，
# 然后双击使得数字处于选中状态，再用pyperclip获取粘贴板数字。
import time
import pandas as pd
from tqdm import tqdm
import pyautogui
import webbrowser as wb
from selenium import webdriver
import pyperclip


# 启动程序前先打开浏览器，且使浏览器窗口最大化。
def get_city_data(city):
    url = f'https://weibo.com/{city[1]}'
    # chromepath = r'D:\program\chromedriver_win32\chromedriver.exe'
    # driver = webdriver.Chrome(executable_path=chromepath)
    # driver.get(url)
    # driver.close()
    # driver.quit()
    wb.open(url=url)

    pyautogui.sleep(10)

    # loc.png需要事先打开微博截取，作为目标的模板匹配图片。
    # 不同机器不同分辨率loc.png尺寸大小不同
    locate = pyautogui.locateOnScreen('loc.png')
    center_x, center_y = pyautogui.center(locate)
    HIGHT = 38
    pyautogui.moveTo(center_x, center_y - HIGHT)
    pyautogui.sleep(1)
    pyautogui.doubleClick()
    pyautogui.sleep(1)
    pyautogui.hotkey('ctrl', 'c')
    num = pyperclip.paste()

    return city[0], int(num), time.strftime('%Y-%m-%d %H:%M', time.localtime())


def main():
    city = [
        ('成都', '2384889627'),
    ]

    city_data = []
    pbar = tqdm(total=len(city), leave=True)
    for c in city:
        result = get_city_data(c)
        print(result)
        city_data.append(list(result))

        pbar.update(1)

    col = ['城市', '粉丝数量', '统计时间']
    df = pd.DataFrame(data=city_data, columns=col)
    df = df.sort_values(by=col[1], axis=0, ascending=False)  # 降序

    # 排序后重置index，
    # 否则索引是混乱的
    df = df.reset_index(drop=True)

    # 因为默认的pandas起始索引从0开始，
    # 为了使数据行的初始索引（起始索引index）从1开始
    df.index = df.index + 1

    print(df.head(10))

    df.to_excel('city.xls', encoding='utf-8')
    df.to_csv('city.csv', encoding='utf-8')


if __name__ == '__main__':
    main()

输出：