最近在温习python爬虫知识,写了个简单的爬虫。
能爬取500页相亲女的信息
#coding=utf8
import requests
import re
import xlrd
import xlwt
import time
from bs4 import BeautifulSoup
user_agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.87 Safari/537.36'
headers = {
'Accept':'text/css,*/*;q=0.1',
'Accept-Encoding':'gzip, deflate, sdch, br',
'Accept-Language':'zh-CN,zh;q=0.8',
'Cache-Control':'max-age=0',
'Connection':'keep-alive',
'User-Agent' : user_agent ,
}
class girls():
def get_info(self):
for i in range(1,500):
url="http://www.19lou.com/love/list-164-"+str(i)+".html?sex=0"
r = requests.get(url)
html = r.text
#print html
soup = BeautifulSoup(html)
for tag in soup.find_all('div',class_='list-details'):
for name in tag.find_all('em'):
#print name.string
mylist = re.split(' ',name.string)
#print mylist
for info in mylist:
print info
test = girls()
test.get_info()