'''
获取当当网的书籍价格, 保存到csv, 只有第一页的内容
'''
import requests, re, csv
from bs4 import BeautifulSoup
url = 'http://search.dangdang.com/?key=Python&act=input'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
}
title_list = ['书名', '链接', '现价', '定价', '折扣']
datas = []
def parse():
resp = requests.get(url=url, headers=headers)
soup = BeautifulSoup(resp.text, 'lxml')
book_list = soup.find('div', id='search_nature_rg').find_all('li')
for book in book_list:
datas.append({
'书名': book.find('a', class_='pic').attrs['title'],
'链接': 'http:' + book.find('a', class_='pic').attrs['href'],
'现价': book.find('p', class_='price').find('span', class_='search_now_price').get_text(),
'定价': book.find('p', class_='price').find('span', class_='search_pre_price').get_text(),
'折扣': re.search(r'.*\((.*)\)', book.find('p', class_='price').find('span', class_='search_discount').get_text()).group(1)
})
def writer_csv():
with open('dangdang.csv', 'a', encoding='utf-8', newline='') as fp:
writer = csv.DictWriter(fp, fieldnames=title_list)
writer.writeheader()
for item in datas:
writer.writerow(item)
def main():
parse()
writer_csv()
if __name__ == '__main__':
main()
'''
获取热门城市旅游信息, 保存在xlsx中
'''
import requests
import pandas as pd
from bs4 import BeautifulSoup
url = 'http://www.weather.com.cn/weather1d/101010100.shtml'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
}
columns_name = [
'景区',
'天气',
'气温',
'旅游指数'
]
vaules = [[] for _ in range(len(columns_name))]
def parse_html():
resp = requests.get(url=url, headers=headers)
resp.encoding = 'utf-8'
soup = BeautifulSoup(resp.text, 'lxml')
hot_city_list = soup.find('div', class_='hotSpot').find_all('li')
for city in hot_city_list:
vaules[0].append(city.find('span', class_='name').get_text().strip(''))
vaules[1].append(city.find('span', class_='weather').get_text().strip(''))
vaules[2].append(city.find('span', class_='wd').get_text().strip(''))
vaules[3].append(city.find('span', class_='zs').get_text().strip(''))
def save_xlsx():
df = pd.DataFrame(dict(zip(columns_name, vaules)))
df.to_excel('hotCity.xlsx', sheet_name='info')
def main():
parse_html()
save_xlsx()
if __name__ == '__main__':
main()