#!/usr/bin/env python
# _*_ coding:utf-8 _*_
import unittest
from urllib.parse import unquote
from urllib.request import urlopen
from bs4 import BeautifulSoup
class TestWikipedia(unittest.TestCase):
bsObj=None
url=None
def test_PageProperties(self):
global bsObj
global url
url="http://en.wikipedia.org/wiki/Monty_Python"
#测试遇到的前100个页面
for i in range(1.100):
bsObj=BeautifulSoup(urlopen(url))
titles=self.titleMatchesURL()
self.assertEquals(titles[0],titles[1])
self.assertTrue(self.contentExists())
url=self.getNextLink()
print("done!")
def titleMatchesURL(self):
global bsObj
global url
pageTitle=bsObj.find("h1").get_text()
urlTitle=url[(url.index("/wiki/")+6):]
urlTitle=urlTitle.replace("_"," ")
#去除引号
urlTitle=unquote(urlTitle)
return [pageTitle.lower(),pageTitle.lower()]
def contentExists(self):
global bsObj
content=bsObj.find("div",{"id":"mw-content-text"})
if content is not None:
return True
else:
return False
def getNextLink(self):
print("使用第五章的办法获取随机链接")
if __name__=='_main_':
unittest.main()