from urllib.request import urlopen
import re
import numpy
import time
import pymysql
from bs4 import BeautifulSoup
def getHtmlContent(url):
html = urlopen(url)
bsobj = BeautifulSoup(html,"lxml",from_encoding = "gb18030")
tablelist = bsobj.findAll("tr")
#用户名:root 密码:123456 数据库名:aqi-changsha
db = pymysql.connect("localhost","root","123456","aqi-changsha",charset="utf8")
cursor = db.cursor()
date = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))#获取当前时间
date1 = ("insert into date values (%s)")
try:
cursor.execute(date1,date)
db.commit(); #提交操作
except:
db.rollback();
for datalist in tablelist[1:]:
data = datalist.get_text().replace(" ","").replace("\r\n","").strip("\n").split("\n")
data.append(date)
data.append("0")
try:
insert_data = ("insert into aqi values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)")
cursor.execute(insert_data,data)
db.commit();
except:
db.rollback()
db.close()
def main():
url = "http://www.tianqihoubao.com/aqi/changsha.html"
getHtmlContent(url)
if __name__ == "__main__":
main()
python爬虫获取数据后存入MySQL数据库中
最新推荐文章于 2024-08-12 15:08:41 发布