from bs4 import BeautifulSoup
import requests
import codecs
import os
import re
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'}
def get_url_list(url):
html = requests.get(url,headers)
soup = BeautifulSoup(html.content, 'lxml')
con_list = []
x1 = soup.find('div',attrs={'id':'content'})
print(x1.get_text())
if '__main__' == __name__:
url = 'http://www.xinshubao.net/22/22070/3179772_2.html'
url_list = get_url_list(url)
获取文字内容非常简便