导包
import requests,re,chardet,pymysql
from piaot import *
获取首页一共有多少个分类
def shoye():
url='http://www.dytt8.net/html/gndy/dyzz/index.html'
headers={
"User-Agent":pa()
}
req=requests.get(url,headers=headers)
req.encoding= 'gb2312'
html=req.text
# 正则
zz='<a href="(.*?)">(.*?)</a></li><li>'
str_1 = re.compile(zz)
html = str_1.findall(html)
# 取出我们想要的数据
html=html[:11]
lie_list=[]
for i in html:
if 'http://www.ygdy8.net' in i[0]:
lie_list.append((i[0],i[1]))
else:
a='http://www.ygdy8.net'+i[0]
lie_list.append((a,i[1]))
return lie_list
子类爬取
def zl(url):
html=''
try:
req=yc(url)