def getNewsList(Newsurl):
global ARTICLES
global TITLE
global TIME
global COUNT
global IMG
res=requests.get(Newsurl)
soup=BeautifulSoup(res.text,'html.parser')
TITLE=soup.select('.listList ul li a')
TIME=soup.select('.time')
# IMG=soup.select('')
# print(TIME)
# print(TIME)
for each in TITLE:
COUNT=COUNT+1
if 'ggyw' in Newsurl:
creatTable__ggyw()
elif 'xydt'in Newsurl:
creatTable__xydt()
def creatTable__ggyw():
class Title_ggyw(db.Model):
__tablename__='title_ggyw'
id=db.Column(db.Integer,primary_key=True)
name=db.Column(db.String(1024))
time=db.Column(db.String(265))
def __ref__(self):
return '<Title_ggyw{}>'.format(self.name,self.time)
db.create_all()
for i in range(0,COUNT):
title_ggyw=Title_ggyw(name=TITLE[i].text,time=TIME[i].text)
db.session.add_all([title_ggyw])
db.session.commit()
def creatTable__xydt():
class Title_xydt(db.Model):
__tablename__='title_xydt'
id=db.Column(db.Integer,primary_key=True)
name=db.Column(db.String(1024))
time=db.Column(db.String(265))
def __ref__(self):
return '<Title_xydt{}>'.format(self.name,self.time)
db.create_all()
for i in range(0,5):
title_xydt=Title_xydt(name=TITLE[i].text,time=TIME[i].text)
db.session.add_all([title_xydt])
db.session.commit()
getNewsList('http://www.dgut.edu.cn/dgut/ggyw/news_list.shtml')
getNewsList('http://www.dgut.edu.cn/dgut/xydt/news_list.shtml')
代码如上。
运行的时候报错,说是index溢出。
第一感觉就是去查看一下for循环里的count。发现问题了,创建完第一个表时,count没有归零。
于是加了一行count =0;
改后代码如下:
def getNewsList(Newsurl): # artcles='' # count=0 global ARTICLES global TITLE global TIME global COUNT global IMG res=requests.get(Newsurl) soup=BeautifulSoup(res.text,'html.parser') TITLE=soup.select('.listList ul li a') TIME=soup.select('.time') # IMG=soup.select('') # print(TIME) # print(TIME) for each in TITLE: COUNT=COUNT+1 if 'ggyw' in Newsurl: creatTable__ggyw() COUNT=0 elif 'xydt'in Newsurl: creatTable__xydt()
COUNT=0
def creatTable__ggyw(): class Title_ggyw(db.Model): __tablename__='title_ggyw' id=db.Column(db.Integer,primary_key=True) name=db.Column(db.String(1024)) time=db.Column(db.String(265)) def __ref__(self): return '<Title_ggyw{}>'.format(self.name,self.time) db.create_all() for i in range(0,COUNT): title_ggyw=Title_ggyw(name=TITLE[i].text,time=TIME[i].text) db.session.add_all([title_ggyw]) db.session.commit()def creatTable__xydt(): class Title_xydt(db.Model): __tablename__='title_xydt' id=db.Column(db.Integer,primary_key=True) name=db.Column(db.String(1024)) time=db.Column(db.String(265)) def __ref__(self): return '<Title_xydt{}>'.format(self.name,self.time) db.create_all() for i in range(0,5): title_xydt=Title_xydt(name=TITLE[i].text,time=TIME[i].text) db.session.add_all([title_xydt]) db.session.commit()getNewsList('http://www.dgut.edu.cn/dgut/ggyw/news_list.shtml')getNewsList('http://www.dgut.edu.cn/dgut/xydt/news_list.shtml')