我个人是某一本普通大学计算机学院学生,最近在学习爬虫,用requests库和正则表达式爬取猫眼电影TOP100时,发现运行结果是两个中括号 【 】,希望大神指导一下,感激不尽!!!!
以下是我写的源代码!!
import requests from requests.exceptions import RequestException import re def get_one_page(url): headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36', } try: response=requests.get(url,headers=headers) if response.status_code==200: return response.text return None except RequestException: return None def parse_one_page(html): pattern=re.compile('<dd>.*?board-index.*?>(\d+)<i>.*?data-src="(.*?)".*?name"><a' +'.*?>(.*?)</a>.*?start">(.*?)</p>.*?releasetime">(.*?)</p>' +'.*?integer">(.*?)</i>.*?fraction">(.*?)</i>.*?<dd>',re.S) items=re.findall(pattern,html) print(items) def main(): url='http://maoyan.com/board/4?' html=get_one_page(url) parse_one_page(html) if __name__ == "__main__": main()