import re
import requests
import pymysql
from bs4 import BeautifulSoup
def spideData(page):
conn = pymysql.connect(host='118.190.8.4', user='shici', password='shici', database='shici', port=3306)
cursor = conn.cursor()
cursor.execute(
"create table if not exists tangshi(id int(11) NOT NULL AUTO_INCREMENT,title varchar(100),author varchar(50),content text,PRIMARY KEY (`id`))")
url="https://so.gushiwen.cn/shiwens/default.aspx?page="+str(page)+"&tstr=&astr=&cstr=&xstr=诗"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"
}
html=requests.get(url=url,headers=headers).text
soup=BeautifulSoup(html,'lxml')
titleList=soup.select('div.sons p b')
authorList=soup.select('.sons p.source')
contentList=soup.select('.sons .contson')
index=0
for title in titleList:
query="insert into tangshi (title,author,content) values (%s,%s,%s)"
cursor.execute(query,(title.text.strip(),authorList[index].text.strip(),contentList[index].text.strip()))
index+=1
conn.commit()
cursor.close()
conn.close()
if __name__=="__main__":
for i in range(2,10):
spideData(i)
使用Python 抓取数据并写入Mysql数据库
最新推荐文章于 2024-07-12 16:16:27 发布