from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.action_chains import ActionChains
import csv
import PIL.Image as image
from PIL import Image, ImageEnhance
import time, re, random
import requests
from io import StringIO
# 保存信息
def towrite(item):
with open('weibo.csv', 'a', encoding='utf-8') as csvfile:
writer = csv.writer(csvfile)
try:
writer.writerow(item)
except:
print("writer error!")
# # 对比RGB值
# def is_similar(image1, image2):
# # 获取指定位置的RGB值
# pixel1 = image1.getpixel((x,y))
# pixel2 = image2.getpixel((x,y))
# for i in range(0,3):
# # 如果相差超过50则认为找到缺口位置
# if abs(pixel1[i]-pixel2[i]) >= 50:
# return False
# return True
#
# # 计算缺口位置
# def get_diff_location(image1, image2):
# i = 0
# # 两张原始图的大小都是相同的260*160
# # 通过两个for循环依次对比RGB值
# # 如果相差50则认为找到了缺口的位置
# for i in range(62, 260):
# for j in range(0, 160):
# if is_similar(image1, image2, i, j) == False:
# return i
# 主函数
def main():
# 登录微博
login_url = "https://passport.weibo.cn/signin/login?entry=mweibo&res=wel&wm=3349&r=http%3A%2F%2Fm.weibo.cn%2F"
driver = webdriver.Chrome()
driver.get(login_url)
driver.implicitly_wait(60) # 最多等待60s
time.sleep(5)
username = driver.find_element_by_xpath('//*[@id="loginName"]')
username.send_keys('') # 填入账号
password = driver.find_element_by_xpath('//*[@id="loginPassword"]')
password.send_keys('') # 填入账号密码
time.sleep(5)
submit = driver.find_element_by_xpath('//*[@id="loginAction"]')
submit.click() # 点击登录
time.sleep(2)
# 点击验证码
try:
yanzheng = driver.find_element_by_xpath('//*[@id="embed-captcha"]/div/div[2]/div[1]/div[3]')
yanzheng.click()
except Exception as err:
print(err)
time.sleep(5)
# # 滑动验证码
# driver.get_screenshot_as_file("D:/滑动验证.jpg") # 对整个页面截图
# imgelement = driver.find_element_by_xpath('/html/body/div[4]/div[2]/div[1]/div/div[1]/div[1]/div/a/div[1]/div/canvas[1]') # 定位验证码
# location = imgelement.location # 获取验证码x,y轴坐标
# size = imgelement.size # 获取验证码的长宽
# rangle = (int(location['x']), int(location['y']), int(location['x'] + size['width']), int(location['y'] + size['height'])) # 需要截取的位置坐标
# i = Image.open("D:/滑动验证.jpg") # 打开截图
# i = i.convert('RGB')
# frame1 = i.crop(rangle) # 使用Image的crop函数, 从截图中再次截取我们需要的区域
# frame1.save('D:/滑动验证new.jpg')
# driver.find_element_by_xpath('/html/body/div[4]/div[2]/div[1]/div/div[1]/div[2]/div[2]').click()
# time.sleep(3)
#
# driver.get_screenshot_as_file("D:/滑动验证.jpg")
# imgelement = driver.find_element_by_xpath('/html/body/div[4]/div[2]/div[1]/div/div[1]/div[1]/div/a/div[1]/div/canvas[2]')
# location = imgelement.location # 获取验证码x,y轴坐标
# size = imgelement.size # 获取验证码的长宽
# rangle = (int(location['x']), int(location['y']), int(location['x'] + size['width']),
# int(location['y'] + size['height'])) # 需要截取的位置坐标
# i = Image.open("D:/滑动验证.jpg") # 打开截图
# i = i.convert('RGB')
# frame2 = i.crop(rangle) # 使用Image的crop函数, 从截图中再次截取我们需要的区域
# frame2.save('D:/滑动验证new2.jpg')
#
# # 计算缺口位置
# loc = get_diff_location(frame1, frame2)
# print('-------------')
# print(loc)
# # 找到滑动的圆球
# element = driver.find_element_by_xpath('/html/body/div[4]/div[2]/div[1]/div/div[1]/div[2]/div[2]')
# location = element.location
# # 获取滑动圆球的高度
# y = location['y']
# # 鼠标点击元素并按住不放
# print("点击按钮不放")
# ActionChains(driver).click_and_hold(on_element=element).perform()
# time.sleep(0.15)
# # 拖动
# print("拖动按钮")
# ActionChains(driver).move_to_element_with_offset(to_element=element, xoffset=loc + 30, yoffset=loc - 445).perform()
# # 释放鼠标
# ActionChains(driver).release(on_element=element).perform()
# 爬取文章信息并保存
try:
all_weibo = driver.find_elements_by_xpath('//*[@id="app"]/div[1]/div[2]/div[2]')
print("all_weibo:", all_weibo)
for weibo in all_weibo:
fabuId = weibo.find_elements_by_xpath('div/div/div/header/div[2]/div/a/h3/text()')[0].text
fabuNeirong = weibo.find_elements_by_xpath('div/div/article/div/div/div[1]')[0].text
item = [fabuId, fabuNeirong]
towrite(item)
except:
print("爬取失败!")
# 主函数入口
if __name__ == '__main__':
main()
通过selenium爬取新浪微博
最新推荐文章于 2024-06-21 17:35:49 发布
关键词由CSDN通过智能技术生成