这个是不用多线程,可以看看他是卡,卡在了哪里,为什么会卡半天
def dmax():
import requests, re,threading
from bs4 import BeautifulSoup
from threading import Thread
url='http://www.3dmax8.com/3dmax/peixun/3dmax2020/'
#url = 'http://www.3dmax8.com/3dmax/peixun/3dmax2019/'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)\
AppleWebKit/537.36 (KHTML, like Gecko)\
Chrome/79.0.3945.130 Safari/537.36 Edg/79.0.309.71'}
response = requests.get(url, headers)
response.encoding = "gbk"
soup = BeautifulSoup(response.text, "html.parser")
first_title = soup.find_all("li")
kk = []
for i in first_title:
zh_char = '[\u4e00-\u9fa5]+' # 中文字符
k = re.findall(zh_char, str(i))
pattern = re.compile(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+') # 匹配模式
f = re.findall(pattern, str(i))
try:
z = ''
for i in k:
z = z + i
# print(i)
z = z + ':' + f[0]
kk.append(f[0])
print(z)
# zz.append(str(f[0]))
# print(z[1],':',f[0])
except ValueError:
print('')
except Exception:
print('')
# 没有预先判断到的错误怎么办?
# ZeroDivisionError
finally:
# 无论是否有异常,都会执行的代码
print('')
# print(f)
# print(int((len(zz)-1)/2))
def pa(url):
url = str(url)
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)\
AppleWebKit/537.36 (KHTML, like Gecko)\
Chrome/79.0.3945.130 Safari/537.36 Edg/79.0.309.71'}
response = requests.get(url, headers)
response.encoding = "gbk"
soup = BeautifulSoup(response.text, "html.parser")
first_title = soup.find_all("li")
for i in first_title:
zh_char = '[\u4e00-\u9fa5]+' # 中文字符
k = re.findall(zh_char, str(i))
pattern = re.compile(
r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+') # 匹配模式
f = re.findall(pattern, str(i))
#kk = []
try:
z = ''
for i in k:
z = z + i
# print(i)
z = z + ':' + f[0]
#kk.append(f[0])
print(z)
# zz.append(str(f[0]))
# print(z[1],':',f[0])
except ValueError:
print('')
except Exception:
print('')
# 没有预先判断到的错误怎么办?
# ZeroDivisionError
finally:
# 无论是否有异常,都会执行的代码
print('')
#return kk
for i in kk:
'''t = Thread(target=pa, args=(i,))
t.start()'''
pa(i)
dmax()
下面的这个是利用了多线程的代码,看看速度是多么的流畅
def dmax():
import requests, re,threading
from bs4 import BeautifulSoup
from threading import Thread
url='http://www.3dmax8.com/3dmax/peixun/3dmax2020/'
#url = 'http://www.3dmax8.com/3dmax/peixun/3dmax2019/'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)\
AppleWebKit/537.36 (KHTML, like Gecko)\
Chrome/79.0.3945.130 Safari/537.36 Edg/79.0.309.71'}
response = requests.get(url, headers)
response.encoding = "gbk"
soup = BeautifulSoup(response.text, "html.parser")
first_title = soup.find_all("li")
kk = []
for i in first_title:
zh_char = '[\u4e00-\u9fa5]+' # 中文字符
k = re.findall(zh_char, str(i))
pattern = re.compile(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+') # 匹配模式
f = re.findall(pattern, str(i))
try:
z = ''
for i in k:
z = z + i
# print(i)
z = z + ':' + f[0]
kk.append(f[0])
print(z)
# zz.append(str(f[0]))
# print(z[1],':',f[0])
except ValueError:
print('')
except Exception:
print('')
# 没有预先判断到的错误怎么办?
# ZeroDivisionError
finally:
# 无论是否有异常,都会执行的代码
print('')
# print(f)
# print(int((len(zz)-1)/2))
def pa(url):
url = str(url)
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)\
AppleWebKit/537.36 (KHTML, like Gecko)\
Chrome/79.0.3945.130 Safari/537.36 Edg/79.0.309.71'}
response = requests.get(url, headers)
response.encoding = "gbk"
soup = BeautifulSoup(response.text, "html.parser")
first_title = soup.find_all("li")
for i in first_title:
zh_char = '[\u4e00-\u9fa5]+' # 中文字符
k = re.findall(zh_char, str(i))
pattern = re.compile(
r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+') # 匹配模式
f = re.findall(pattern, str(i))
#kk = []
try:
z = ''
for i in k:
z = z + i
# print(i)
z = z + ':' + f[0]
#kk.append(f[0])
print(z)
# zz.append(str(f[0]))
# print(z[1],':',f[0])
except ValueError:
print('')
except Exception:
print('')
# 没有预先判断到的错误怎么办?
# ZeroDivisionError
finally:
# 无论是否有异常,都会执行的代码
print('')
#return kk
for i in kk:
t = Thread(target=pa, args=(i,))
t.start()
#pa(i)
dmax()