![](https://img-blog.csdnimg.cn/20201014180756925.png?x-oss-process=image/resize,m_fixed,h_64,w_64)
python
都护小弟弟
这个作者很懒,什么都没留下…
展开
-
request+bs4 爬取数据存放数据库
import pymysqlimport requestsfrom bs4 import BeautifulSoupfrom lxml import etreeimport timeheaders = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36',}def bq原创 2021-07-06 08:51:53 · 308 阅读 · 0 评论 -
python 滑块验证
from selenium import webdriverimport timefrom selenium.webdriver import ActionChainsurl = '**********'driver = webdriver.Chrome( r"D:\chromedriver.exe")driver.get(url)user = driver.find_element_by_id('fm-login-id').send_keys('******')time.sleep原创 2021-07-04 20:39:30 · 203 阅读 · 1 评论 -
python 数据保存excel
# coding=UTF-8import xlrdimport xlwtfrom xlutils.copy import copyfrom lxml import etreeimport timefrom selenium import webdriverfrom bs4 import BeautifulSoupdef xls_append(path, value): index = len(value) workbook = xlrd.open_workbook(pat原创 2020-11-29 11:36:14 · 188 阅读 · 0 评论 -
scrapy案例 爬取数据保存到excel
# -*- coding: utf-8 -*-import scrapyclass Mkw1Item(scrapy.Item): # define the fields for your item here like: img = scrapy.Field() title = scrapy.Field() type = scrapy.Field() pic = scrapy.Field()# -*- coding: utf-8 -*-import s原创 2020-11-28 11:19:46 · 791 阅读 · 0 评论 -
通过读取excel中的地址搜索经纬度并提交(selenium)
wb = openpyxl.load_workbook('门店位置信息.xlsx')ws = wb.active # 当前活跃的表单rows = ws.rowscolumns = ws.columnsurl = "https://jingweidu.51240.com/"driver = webdriver.Chrome(r"D:\chromedriver.exe")driver.get(url)va = []def cha(v, num_v): if '路' in v:原创 2020-11-14 10:31:05 · 613 阅读 · 0 评论 -
scrapy 爬取数据保存到数据库
items.py# -*- coding: utf-8 -*-# Define here the models for your scraped items## See documentation in:# https://doc.scrapy.org/en/latest/topics/items.htmlimport scrapyclass MkwItem(scrapy.Item): link = scrapy.Field() # 图片 type = scrapy.原创 2020-11-14 10:22:15 · 441 阅读 · 0 评论 -
抓取页面的保存本地未完成
# http://47.89.12.158:8080/js-camel-milk/import requestsfrom bs4 import BeautifulSoupfrom lxml import etreeimport timeurl = 'http://47.89.12.158:8080/js-camel-milk/'html = requests.get(url).contenthtm = BeautifulSoup(html, 'lxml')link = htm.select原创 2020-11-29 09:43:40 · 104 阅读 · 0 评论 -
python 自动点击输入登陆
from selenium import webdriverurl = "https://www.qidian.com/"headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36',}driver = webdriver.Chrome( r"D:\Anaco原创 2020-05-11 02:31:22 · 565 阅读 · 0 评论 -
python urllib抓取豆瓣影评
from urllib import requestfrom lxml import etreeheaders = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36'}urls = ['https://movie.douban.com/review/best/?start={}'原创 2020-05-09 04:11:39 · 283 阅读 · 0 评论 -
python 多进程抓取数据存入txt
import requestsfrom lxml import etreefrom bs4 import BeautifulSoupimport timefrom selenium import webdriverimport sysfrom multiprocessing import Process, Locksys.setrecursionlimit(8000)heade...原创 2020-05-05 15:45:11 · 393 阅读 · 0 评论 -
python xpath匹配 + selenium模拟点击 保存到mysql
import requestsfrom lxml import etreeimport timefrom selenium import webdriverimport pymysqlimport syssys.setrecursionlimit(8000)header = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Wi...原创 2020-05-04 16:07:45 · 290 阅读 · 0 评论 -
python xpath匹配 + selenium模拟点击 保存到txt
import requestsfrom lxml import etreeimport timefrom selenium import webdriverimport syssys.setrecursionlimit(8000)header = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleW...原创 2020-05-04 13:46:56 · 718 阅读 · 0 评论 -
python requests爬取图片保存本地
import requestsimport lxmlfrom bs4 import BeautifulSoupimport timeimport osheader = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,applica...原创 2020-05-03 02:06:21 · 344 阅读 · 0 评论