新人练手篇,主要是巩固自己爬取内容,正式运作,还有今晚加了对文件操作的学习进来,喜欢的朋友可以一起学习下,当然有高手能去分析推算下期结果更好,请这样的大神多分享下,梦想还是要有的,万一实现了呢。大神继续,我洗洗睡
''' Title = 双色球开奖结果采集 Coder = 柏林 Date = 2018-01-25 ''' import requests import urllib import urllib.request import re from bs4 import BeautifulSoup header = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.75 Safari/537.36 Maxthon/5.1.3.2000'} url = 'http://kaijiang.zhcw.com/zhcw/html/ssq/list.html' html = requests.get(url,headers = header) page = re.findall('class="pg"> 共<strong>(.*?)</strong> 页',html.text) for i in range(1,int(page[0])+1): urls = 'http://kaijiang.zhcw.com/zhcw/html/ssq/list_' + str(i) + '.html' data = requests.get(urls, headers=header) soup = BeautifulSoup(data.text) em_list = soup.find_all('em') div_list = soup.find_all('td', {'align': 'center'}) n = 0 # 将双色球数字信息写入num.txt文件 fp = open("num.txt", "w") for div in em_list: emnum1 = div.get_text() #print(emnum1) text = div.get_text() text = text.encode('utf-8') #print(text) n = n + 1 if n == 7: text = text.decode() + '\n' n = 0 else: text = text.decode() + ',' fp.write(str(text)) fp.close() # # 将日期信息写入date.txt文件 fp = open("date.txt", "w") for div in div_list: text = div.get_text().strip('""') #print(text) list_num = re.findall('\d{4}-\d{2}-\d{2}', text) list_num = str(list_num[::1]) list_num = list_num[2:12] if len(list_num) == 0: continue elif len(list_num) > 1: fp.write(str(list_num) + '\n') fp.close() # 将num.txt和date.txt文件进行整合写入hun.txt文件中 # 格式如下: # ('2018-01-25', '03,10,21,23,27,33,11') # ('2018-01-23', '01,08,17,20,21,22,03') # ('2018-01-21', '05,10,17,23,26,32,07') # fp01 = open("date.txt", "r") a = [] for line01 in fp01: a.append(line01.strip('\n')) # print a fp01.close() fp02 = open("num.txt", "r") b = [] for line02 in fp02: b.append(line02.strip('\n')) # print b fp02.close() fp = open("hun.txt", "a") for cc in zip(a, b): # 使用zip方法合并 print(cc) fp.write(str(cc) + '\n') fp.close()