第一关:爬虫基础
import requests
from bs4 import BeautifulSoup
import re
def w1():
url="http://www.glidedsky.com/level/web/crawler-basic-1"
header={
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"Referer": "http://www.glidedsky.com/level/crawler-basic-1",
#将cookie换成自己的就ok了
"Cookie": "----------"
}
rs1=requests.get(url=url,headers=header)
print(rs1.text)
soup = BeautifulSoup(rs1.text, 'html.parser')
pattern = re.compile(r'\b\d{3}\b')
# 找到所有符合条件的div标签
div_tags = soup.find_all('div', class_='col-md-1')
# 提取匹配的数字并求和
total_sum = sum([int(pattern.search(tag.text).group()) for tag in div_tags if pattern.search(tag.text)])
print(f"总和:{total_sum}")
if __name__ =="__main__":
w1()