爬取安居客
coding=utf-8
import re
import urllib.request
from bs4 import BeautifulSoup
import pymysql
db = pymysql.connect("localhost","root","zgx675050748","RUNOOB",charset='utf8')
cursor = db.cursor()
cursor.execute("DROP TABLE IF EXISTS 安居客")
sql = """CREATE TABLE 安居客 (信息 varchar(1000))"""
cursor.execute(sql)
try:
for page in range(1,5):
i=0
url = 'https://beijing.anjuke.com/tycoon/p'+str(page)+'/#'
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
headers = {'User-Agent': user_agent}
request = urllib.request.Request(url, headers=headers)
response = urllib.request.urlopen(request)
content = response.read().decode('utf-8')
soup = BeautifulSoup(content, 'html.parser')
tag_div = soup.find_all('div',{'class':'jjr-info'})
for a in tag_div:
text = a.text
text=text.replace(' ','')
text=text.replace('\n',' ')
sql = """INSERT INTO 安居客(信息)VALUES ("%s")""" % (text)
cursor.execute(sql)
print(sql)
db.commit()
except urllib.request.URLError as e:
if hasattr(e, "code"):
print(e.code)
if hasattr(e, "reason"):
print(e.reason)
db.close()