1.使用普通方式爬取
#!/usr/bin/python3
#coding:utf8
from bs4 import BeautifulSoup
import requests
import time
from concurrent.futures import ProcessPoolExecutor
from threading import Thread
url='http://www.kan12345.com/class.asp?id=27&page='
w=open('ut','w')
#w=open('ut','r+')
#w.read()
def get(url):
req=requests.Session()
html=req.get(url)
#html=html.content.decode("gb2312")
soup=BeautifulSoup(html.text,'html.parser')
ss=soup.find_all('div',class_='box')
soup=BeautifulSoup(str(ss),'html.parser')
ss=soup.find_all('h4')
soup=BeautifulSoup(str(ss),'html.parser')
ss=soup.find_all('a')
print(url)