"""
宝可梦技能信息爬虫
"""
import requests
from bs4 import BeautifulSoup
from requests import Session
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from model.pokemon import Base, pokemon_skill
def skill_spider():
# 创建数据库引擎
engine = create_engine('mysql+pymysql://root:123456@localhost/pokemon?charset=utf8mb4')
# 创建表
Base.metadata.create_all(engine)
# 创建会话
Session = sessionmaker(bind=engine)
session = Session()
# 爬取url
url = 'https://wiki.52poke.com/wiki/%E6%8B%9B%E5%BC%8F%E5%88%97%E8%A1%A8'
# 请求头
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0"
}
# 发送请求 获取数据
resp = requests.get(url, headers=headers, timeout=3)
html = resp.content.decode()
soup = BeautifulSoup(html, 'lxml')
"""
获取宝可梦技能信息
skill_id:技能ID
skill:技能
type:属性
skill_sort:分类
power:威力
hit_rate:命中
PP:技能数
description:说明
"""
table_list = soup.select('.click-toggle table') # 获取每个世代的技能表
for table in table_list: # 遍历每个世代技能表
if table_list.index(table) > 0: # 去除第一个表
tr = table.find_all('tr')
for i in range(1, len(tr)): # 从每个世代的技能表中提取信息
skill_list = tr[i].text.split()
if len(skill_list) >= 7: # 去除不要的行
skill_id = skill_list[0]
skill = skill_list[1]
type = skill_list[-6]
skill_sort = skill_list[-5]
power = skill_list[-4]
hit_rate = skill_list[-3]
PP = skill_list[-2]
description = skill_list[-1]
# 数据存入数据库
skill = pokemon_skill(
skill_id=skill_id,
skill=skill,
type=type,
skill_sort=skill_sort,
power=power,
hit_rate=hit_rate,
PP=PP,
description=description
)
# 添加会话
session.add(skill)
# 提交事务
session.commit()
print(f"====================技能{skill.id}录入成功====================")
print(f"====================全部技能录入成功====================")
if __name__ == '__main__':
skill_spider()