python
Evol_ve
这个作者很懒,什么都没留下…
展开
-
python-numpy模块学习-常用语法(一)
import numpy as npimport matplotlib.pyplot as pltimg_arr = plt.imread('./1.jpeg')# 显示图片plt.imshow(img_arr)# 图片调整颜色-100arr = arr - 100plt.imshow(arr)# 显示图片角度 像素 像素 颜色 (700, 700, 3)arr.shape# 数组角度反转 [行,列,颜色]plt.imshow(arr[::-1,::-1,:])# 图片裁原创 2021-07-18 15:58:50 · 220 阅读 · 0 评论 -
python爬虫入门学习12-scrapy-东莞阳光热线问政-2021-07
# 爬虫类from scrapy.linkextractors import LinkExtractorfrom scrapy.spiders import CrawlSpider, Rulefrom sunPro.items import SunproItem, SunDetailItemfrom selenium import webdriverfrom selenium.webdriver import ActionChainsfrom selenium.webdriver.chrome.原创 2021-07-04 22:42:10 · 631 阅读 · 0 评论 -
python爬虫入门学习11-scrapy-网易新闻-2021-07
import scrapyfrom selenium import webdriverfrom new163Pro.items import New163ProItemimport timeclass New163Spider(scrapy.Spider): name = 'new163' # allowed_domains = ['news.163.com'] start_urls = ['https://news.163.com/'] models_url_lis原创 2021-07-04 22:32:57 · 194 阅读 · 0 评论 -
python爬虫入门学习10-scrapy-站长之家图片爬取
class ImgSpider(scrapy.Spider): name = 'img' start_urls = ['https://sc.chinaz.com/tupian/'] # 爬虫处理方法 def parse(self, response): div_list = response.xpath('//*[@id="container"]/div') for div in div_list: # 图片懒加载原创 2021-07-04 22:26:21 · 195 阅读 · 0 评论 -
python爬虫入门学习9-selenium+超级鹰12306模拟登录过验证码
from util import chaojiying_util as cuimport timefrom selenium import webdriverfrom selenium.webdriver import ActionChainsfrom selenium.webdriver.chrome.options import Optionsfrom selenium.webdriver import ChromeOptionsfrom PIL import Imagedef calx原创 2021-07-02 22:38:53 · 148 阅读 · 0 评论 -
python爬虫入门学习8-xpath-pearvideo-视频批量下载-线程池
import requestsfrom util import headers_utils as hd, download_util as dufrom lxml import htmlimport randomimport jsonfrom multiprocessing.dummy import Pool# 下载方法 配合线程池def batch_download(dict): file_path = dict['file_path'] file_name = dict[原创 2021-06-27 20:27:19 · 484 阅读 · 0 评论 -
python爬虫入门学习7-xpath-chinaz-简历爬取下载-retry
import requestsfrom util import headers_utils as hd, download_util as dlfrom lxml import htmlimport osif __name__ == '__main__': page_num = 744 # 总页数 懒得也逻辑查了 这里自己下载前到网站上确认下 dir_path = './resume' # 文件夹不存在则自动创建 if not os.path.exists(dir_原创 2021-06-26 22:18:29 · 224 阅读 · 2 评论 -
python爬虫入门学习6-xpath-netbian-图片爬取
import requestsfrom lxml import htmlimport osif __name__ == '__main__': headers = { 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36', } url = .原创 2021-06-26 18:12:16 · 124 阅读 · 0 评论 -
python爬虫入门学习5-xpath-52ershoufang
from lxml import htmlimport requestsif __name__ == '__main__': headers = { 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36', } # 获取网页 url = 'h原创 2021-06-26 17:19:05 · 130 阅读 · 0 评论 -
python爬虫入门学习4-诗词名句网-BeautifulSoup
from bs4 import BeautifulSoupimport requestsif __name__ == '__main__': headers = { 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36', } url = 'http原创 2021-06-26 17:14:43 · 153 阅读 · 0 评论 -
python3 操作es简单学习
ES和Mysql对应关系关# 连接eses = Elasticsearch(["127.0.0.1"], http_auth=('es_service', ''), port=9200)# 插入数据,index,doc_type名称可以自定义,id可以根据需求赋值,body为内容 如果不指定 id,会自动生成一个 iddef index(database, table, id, doc): # return es.index(index=database, doc_type=table,原创 2021-06-26 17:09:27 · 1013 阅读 · 0 评论 -
python3 flask上传excel xls文件到后台并解析
前端代码<!DOCTYPE html><html lang="en"><head> <meta charset="UTF-8"> <title>Title</title></head><body><div> <form method="post" enctype="multipart/form-data" action="http://127.0.0.1:50原创 2021-04-25 11:04:44 · 2665 阅读 · 3 评论 -
python爬虫入门学习3 批量爬取小说并生成文件
#!/usr/bin/python# -*- coding: UTF-8 -*-"""@author:Evolve Hsu@file:thread_book.py@time:2021/03/26"""import reimport urllibimport threadingfrom urllib import request, error # 制定URL 获取网页数据from bs4 import BeautifulSoup # 网页解析 获取数据import sqlite3原创 2021-03-28 17:53:07 · 1121 阅读 · 3 评论 -
python爬虫入门学习2 -使用urllib,BeautifulSoup及sqlite3 练习小学爬取某新笔X阁
闲来没事 刚学了爬虫 尝试用某笔X阁练手 初学者水平#!/usr/bin/python# -*- coding: UTF-8 -*-"""@author:Evolve Hsu@file:thread_book.py@time:2021/03/26"""import urllibimport threadingfrom urllib import request, error # 制定URL 获取网页数据from bs4 import BeautifulSoup # 网页解析 获取数原创 2021-03-27 18:00:26 · 277 阅读 · 0 评论