1、使用python操作MySQL数据库
1.1、在python中安装 MySQLdb
pip3 install pymysql
1.2、连接数据库
import pymysql
db = pymysql.connect(host='localhost',
user='root',
password='0315',
db='test')
1.3、数据库基本操作
cursor = db.cursor()
sql = "select * from student;"
try:
cursor.execute(sql)
results = cursor.fetchall()
for row in results:
print(row)
except Exception as e:
raise e
finally:
db.close()
cursor = db.cursor()
sql = "insert into student values('110','jacob','男','1996-03-15','95031');"
try:
cursor.execute(sql)
db.commit()
except Exception as e:
db.rollback()
finally:
db.close()
cursor = db.cursor()
sql = "delete from student where id='110';"
try:
cursor.execute(sql)
db.commit()
except Exception as e:
db.rollback()
finally:
db.close()
cursor = db.cursor()
sql = "update student set name='王芳芳' where name='王芳';"
try:
cursor.execute(sql)
db.commit()
except Exception as e:
db.rollback()
finally:
db.close()
2、爬取链家网站上租房信息
import requests
from bs4 import BeautifulSoup
import pymysql
import time
def get_db():
db = pymysql.connect(host='localhost',
user='root',
password='0315',
db='LianJia')
return db
def insert(db, house_info):
values = "'{}'," * 4 + "'{}'"
sql_values = values.format(house_info['价格'],
house_info['租赁方式'],
house_info['房屋类型'],
house_info['朝向楼层'],
house_info['link'])
cursor = db.cursor()
sql = """
insert into house (price,house_ways,house_type,house_towards,house_link)
values({})
""".format(sql_values)
print(sql)
cursor.execute(sql)
db.commit()
def get_page(url):
responce = requests.get(url)
soup = BeautifulSoup(responce.text, 'lxml')
return soup
def get_links(link_url):
soup = get_page(link_url)
links_div = soup.find_all('div', class_="content__list--item")
links = [div.a.get('href') for div in links_div]
return links
def get_house_info(house_url):
soup = get_page(house_url)
price = soup.find('div', class_='content__aside--title').text[1:8]
house_info = soup.find_all('span')
house_ways = house_info[20].nextSibling
house_type = house_info[21].next_sibling
house_towards = house_info[22].nextSibling
info = {
"价格": price,
"租赁方式": house_ways,
"房屋类型": house_type,
"朝向楼层": house_towards,
"link": house_url
}
return info
db = get_db()
links = get_links('https://bj.lianjia.com/zufang/')
for house_url in links:
house_info = get_house_info('https://bj.lianjia.com' + house_url)
time.sleep(2)
insert(db, house_info)