- 博客(14)
- 收藏
- 关注
原创 request+bs4 爬取数据存放数据库
import pymysqlimport requestsfrom bs4 import BeautifulSoupfrom lxml import etreeimport timeheaders = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36',}def bq
2021-07-06 08:51:53 341
原创 python 滑块验证
from selenium import webdriverimport timefrom selenium.webdriver import ActionChainsurl = '**********'driver = webdriver.Chrome( r"D:\chromedriver.exe")driver.get(url)user = driver.find_element_by_id('fm-login-id').send_keys('******')time.sleep
2021-07-04 20:39:30 224 1
原创 python 数据保存excel
# coding=UTF-8import xlrdimport xlwtfrom xlutils.copy import copyfrom lxml import etreeimport timefrom selenium import webdriverfrom bs4 import BeautifulSoupdef xls_append(path, value): index = len(value) workbook = xlrd.open_workbook(pat
2020-11-29 11:36:14 209
原创 抓取页面的保存本地未完成
# http://47.89.12.158:8080/js-camel-milk/import requestsfrom bs4 import BeautifulSoupfrom lxml import etreeimport timeurl = 'http://47.89.12.158:8080/js-camel-milk/'html = requests.get(url).contenthtm = BeautifulSoup(html, 'lxml')link = htm.select
2020-11-29 09:43:40 127
原创 scrapy案例 爬取数据保存到excel
# -*- coding: utf-8 -*-import scrapyclass Mkw1Item(scrapy.Item): # define the fields for your item here like: img = scrapy.Field() title = scrapy.Field() type = scrapy.Field() pic = scrapy.Field()# -*- coding: utf-8 -*-import s
2020-11-28 11:19:46 829
原创 通过读取excel中的地址搜索经纬度并提交(selenium)
wb = openpyxl.load_workbook('门店位置信息.xlsx')ws = wb.active # 当前活跃的表单rows = ws.rowscolumns = ws.columnsurl = "https://jingweidu.51240.com/"driver = webdriver.Chrome(r"D:\chromedriver.exe")driver.get(url)va = []def cha(v, num_v): if '路' in v:
2020-11-14 10:31:05 645
原创 scrapy 爬取数据保存到数据库
items.py# -*- coding: utf-8 -*-# Define here the models for your scraped items## See documentation in:# https://doc.scrapy.org/en/latest/topics/items.htmlimport scrapyclass MkwItem(scrapy.Item): link = scrapy.Field() # 图片 type = scrapy.
2020-11-14 10:22:15 482
原创 python 自动点击输入登陆
from selenium import webdriverurl = "https://www.qidian.com/"headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36',}driver = webdriver.Chrome( r"D:\Anaco
2020-05-11 02:31:22 610
原创 python urllib抓取豆瓣影评
from urllib import requestfrom lxml import etreeheaders = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36'}urls = ['https://movie.douban.com/review/best/?start={}'
2020-05-09 04:11:39 309
原创 python 多进程抓取数据存入txt
import requestsfrom lxml import etreefrom bs4 import BeautifulSoupimport timefrom selenium import webdriverimport sysfrom multiprocessing import Process, Locksys.setrecursionlimit(8000)heade...
2020-05-05 15:45:11 414
原创 python xpath匹配 + selenium模拟点击 保存到mysql
import requestsfrom lxml import etreeimport timefrom selenium import webdriverimport pymysqlimport syssys.setrecursionlimit(8000)header = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Wi...
2020-05-04 16:07:45 307
原创 python xpath匹配 + selenium模拟点击 保存到txt
import requestsfrom lxml import etreeimport timefrom selenium import webdriverimport syssys.setrecursionlimit(8000)header = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleW...
2020-05-04 13:46:56 746
原创 python requests爬取图片保存本地
import requestsimport lxmlfrom bs4 import BeautifulSoupimport timeimport osheader = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,applica...
2020-05-03 02:06:21 363
原创 微擎上传图片文件
phpload()->func('file');$filename = 'images/uplodes/' . md5(time().rand(0000,9999)) . '.' . explode('.', $_FILES['image']['name'])[1];file_move($_FILES['image']['tmp_name'], MODULE_ROOT . '/' . ...
2020-04-22 16:57:08 1112
空空如也
空空如也
TA创建的收藏夹 TA关注的收藏夹
TA关注的人