from bs4 import BeautifulSoup as bsp
import requests
import re
import csv
i = 549
titles3 = []
while i > 538:
urls = "http://www.view.sdu.edu.cn/xyxw/" + str(i) + ".htm"
page_response = requests.get(urls, timeout = 5)
page_content = bsp(page_response.content,"html.parser")
for sublist in page_content.find_all(class_ = "sublist"):
titles = sublist.find_all("li")
titles3 += [title.text for title in titles]
i -= 1
titles4 = str(titles3)
titles5=titles4.replace(",","\n")
temp = []
time_t = []
with open("viewsdu5.txt", mode = "w", encoding = "utf-8") as f:
f.write(titles5)
f.close()
with open("viewsdu5.txt", mode = "r", encoding = "utf-8") as f:
for i in f.readlines():
str = (re.search(r"(\d{4}-\d{1,2}-\d{1,2})",i))
str1 = str.group(0)
time_t.append(str1)
titles6 = i.replace(str1, " ")
temp.append(titles6)
f.close()
with open("viewsdu5.txt", mode = "w", encoding = "utf-8") as f:
for i in temp:
f.write(i)
f.close()
headers = ["内容", "时间"]
rows = []
for (tem,tim) in zip(temp,time_t):
rows.append([tem,tim])
with open("csv01.csv", "w", newline='') as f:
f_csv = csv.writer(f)
f_csv.writerow(headers)
for row in rows:
f_csv.writerow(row)
f.close()