import urllib.request,os,re,random,time
from urllib.error import URLError, HTTPError
l import load_workbook
from openpyxl import Workbook
def mk_dir():
#创建文件夹
if not os.path.exists(newspath):
os.mkdir(newspath)
if not os.path.exists(newspath+'config\\'):
os.mkdir(newspath+'config\\')
if not os.path.exists(newspath + 'config\\adress_dict.xlsx'):
#预置新闻
header = ['网站名称','网站地址','新闻地址规则','新闻头',
'新闻尾','标题头','标题尾','正文头','段落头',
'段落尾']
url_dict = {'zaobao':'https://www.zaobao.com/realtime/china',
'sina':'http://hunan.sina.com.cn',
'fenghuang':'http://news.ifeng.com/',
'baidu':'http://news.baidu.com/guonei',
'163':'http://news.163.com/',
'pengbai':'http://www.thepaper.cn/channel_25950'}
news_addr = {'zaobao':'realtime/china/story'+time.strftime('%Y%m%d',time.localtime())+u'-\d+',
'sina':'http://hunan.sina.com.cn/news/'+u'./'+time.strftime('%Y-%m-%d',time.localtime())+u'/detail-\D+\d{7}\.shtml',
'fenghuang':'http://news.ifeng.com'+u'/./'+time.strftime('%Y%m%d',time.localtime())+'/\d{8}_\d\.shtml',
'baidu':'http://www.thepaper.cn/newsDetail_forward_'+u'\d{7}',
'163':'http://news.163.com/'+time.strftime('%y/%m%d',time.localtime())+u'/\d\d/\.+\.html',
'pengbai':'newsDetail_forward_'+u'\d+'}
news_dict = {'zaobao_begin':'<div class="body-content">','zaobao_end':'<div id="dfp-ad-midarticlespecial-wrapper" class="dfp-tag-wrapper">',
'sina_begin':'<div class="article-header clearfix">','sina_end':'<strong class=\'article_erweima_title\'>',
'fenghuang_begin':'<div class="yc_ma