#coding=utf-8
from bs4 import BeautifulSoup
import urllib2
import re
def searchurl(url):
url = urllib2.urlopen(url)
soup = BeautifulSoup(url)
return soup
def search163(soup,divs,di):
tags=soup.select(divs)
a= tags[0].select(di)
list1=list()
for i in a:
print "标题:",i.get_text()
print "标题连接:",i["href"]
list1.append(i["href"])
return list1
pass
def searchhold(i,divs,di):
for a in i:
soup=searchurl(a)
cont=search66(soup,divs,di)
#print cont
def search66(soup,divs,di):
s=soup.select(divs)
so= s[0].select(di)
for i in so:
print re.sub('\s','',i.get_text()).strip()
if __name__ == '__main__':
s=searchurl("http://news.163.com/world/")
i=search163(s,".today_news","a")
#searchhold(i,"#endText","p")
pass
from bs4 import BeautifulSoup
import urllib2
import re
def searchurl(url):
url = urllib2.urlopen(url)
soup = BeautifulSoup(url)
return soup
def search163(soup,divs,di):
tags=soup.select(divs)
a= tags[0].select(di)
list1=list()
for i in a:
print "标题:",i.get_text()
print "标题连接:",i["href"]
list1.append(i["href"])
return list1
pass
def searchhold(i,divs,di):
for a in i:
soup=searchurl(a)
cont=search66(soup,divs,di)
#print cont
def search66(soup,divs,di):
s=soup.select(divs)
so= s[0].select(di)
for i in so:
print re.sub('\s','',i.get_text()).strip()
if __name__ == '__main__':
s=searchurl("http://news.163.com/world/")
i=search163(s,".today_news","a")
#searchhold(i,"#endText","p")
pass