from datetime import datetime import pymysql import requests import time import re import urllib.request import os import json from bs4 import BeautifulSoup #获取资源的路径 #url='http://repo1.maven.org/maven2/HTTPClient/' #存放到路径 pathFinal="C:\\Users\\gao\\Desktop\\mysqlConnectionDown" url="https://datachart.500.com/ssq/history/newinc/history.php?limit=100&sort=0" url2 ="http://datachart.500.com/ssq/history/newinc/history.php?start=1001&end=19019" content = requests.get(url2,timeout = 500 ) content = content.text # print(content) #content = content.replace("var libs =","") #print(json.loads(content)) soup = BeautifulSoup(content, 'html.parser') content=soup.find(id='tdata') # print(content) trs = content.find_all('tr') conn = pymysql.connect(host='localhost', user='root',password='123456',database='aaa',charset='utf8') cursor = conn.cursor() # trs = str(trs) # trs = trs.replace(' <tr class="tdbck"><td colspan="51"></td></tr>, ','') # #print(trs) # trs = BeautifulSoup(trs, 'html.parser') # print(type(trs)) for tr in trs : # if str(tr) !='<tr class="tdbck"><td colspan="51"></td></tr>': # # print(trs[0]) idre = re.compile('<tr class="t_tr1"><!--<td>2</td>--><td>(.*?)</td>') id = re.findall(idre, str(tr)) # print(id[0]) id= id[0] hongre = re.compile('<td class="t_cfont2">(.*?)</td>') hongs = re.findall(hongre, str(tr)) # for hong in hongs: hong1=hongs[0] hong2=hongs[1] hong3=hongs[2] hong4=hongs[3] hong5=hongs[4] hong6=hongs[5] lanre = re.compile('<td class="t_cfont4">(.*?)</td>') lan = re.findall(lanre, str(tr)) print(lan[0]) riqire = re.compile('</td><td>([^<]*?)</td></tr>') riqi = re.findall(riqire, str(tr)) print(riqi[0]) print('-----------') # sql = "INSERT INTO caipiao(qi) VALUES ( %d);" sql = "INSERT INTO caipiao(qi,hong1,hong2,hong3,hong4,hong5,hong6,lan,riqi) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,'%s');" %(id,hong1,hong2,hong3,hong4,hong5,hong6,lan[0],datetime.strptime(riqi[0], "%Y-%m-%d")) # 执行SQL语句 print(id) print(type(id)) # print(hong1) # print(type(hong1)) # print(hong2) # print(hong3) # print(hong4) # print(hong5) # print(hong6) # print(lan[0]) # print(type(lan[0])) # print(datetime.strptime(riqi[0], "%Y-%m-%d")) # print(type(datetime.strptime(riqi[0], "%Y-%m-%d"))) # #datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") cursor.execute(sql) conn.commit() #print(content) # content= json.loads(content) # print(len(content)) # for i in content: # print("geturl(\"http://cdn.code.baidu.com/v/"+i+"\")") # # 提交事务 cursor.close() conn.close()
python爬虫获取双色球历史中奖纪录&写入数据库
最新推荐文章于 2024-08-14 18:25:08 发布