爬虫
文章平均质量分 77
jolingcome
在CSDN中学习成长
展开
-
BeautifulSoup基础应用
#导入Beautifulsoup包from bs4 import BeautifulSoup as bshtml_doc = """The Dormouse's storyThe Dormouse's storyOnce upon a time there were three little sisters; and their names wereElsie,Lacie an原创 2017-03-21 09:02:44 · 251 阅读 · 0 评论 -
爬取wikipedia词条
#导入Beautifulsoup包from bs4 import BeautifulSoup as bsfrom urllib.request import urlopenimport re# 请求URL并把结果用utf-8编码resp=urlopen("https://en.wikipedia.org/wiki/Main_page").read().decode("utf-8")#原创 2017-03-21 09:30:17 · 1140 阅读 · 0 评论 -
写入mysql数据库
#导入Beautifulsoup包from bs4 import BeautifulSoup as bsfrom urllib.request import urlopenimport re# 请求URL并把结果用utf-8编码resp=urlopen("https://en.wikipedia.org/wiki/Main_page").read().decode("utf-8")原创 2017-03-23 09:21:22 · 516 阅读 · 0 评论 -
mysql数据库查询
#coding=utf-8__author__ = 'mac'#导入开发包import pymysql#获取数据库连接connection=pymysql.connect(host='127.0.0.1', port=3306, user='root',原创 2017-03-24 09:01:55 · 241 阅读 · 0 评论 -
多线程爬斗图网
#多线程爬斗图网站import requests,threadingfrom lxml import etreefrom bs4 import BeautifulSoup#1.获取网页,获取源码def get_html(url): # url='https://www.doutula.com/article/list/?page=1' headers={'User-Ag原创 2017-04-14 10:16:41 · 594 阅读 · 0 评论