python带界面爬取历史天气,可选城市,指定时间段

网上有很多python爬取历史天气的例子,一般都是纯代码,用起来有些不方便,笔者做了个界面,城市也可以选择,时间段设定的是可爬取指定年份和月份的数据。

先看界面和效果

不多说了,直接放上完整代码,真心觉得好用的,麻烦点个赞。程序打包的话,自己百度。

import tkinter as tk
from tkinter import  Button, Label, filedialog,scrolledtext, Entry, ttk, Frame, TOP, LEFT, RIGHT, X, HORIZONTAL
import requests
import re
import pandas as pd
from bs4 import BeautifulSoup
import time
import random
import os
from datetime import datetime
# 获取省级数据
def get_province_data():
    url = "https://tianqi.2345.com/tqpcimg/tianqiimg/theme4/js/global.js?v=20220613"
    response = requests.get(url)
    data = response.text
    prov_data = {}
    pattern = re.compile(r'theProvs\[(\d+)\] = "(.*?)";')
    matches = pattern.findall(data)
    for match in matches:
        code, name = match
        prov_data[code] = name
    return prov_data

# 获取市县级数据
def get_city_county_data():
    url = "https://tianqi.2345.com/tqpcimg/tianqiimg/theme4/js/citySelectData2.js"
    response = requests.get(url)
    data = response.text
    city_data = {}
    county_data = {}
    pattern = re.compile(r"provqx\[(\d+)\]=\['(.*?)'\]", re.DOTALL)
    matches = pattern.findall(data)
    for prov_code, cities_info in matches:
        city_infos = cities_info.split(',')
        city_data[prov_code] = []
        county_data[prov_code] = {}
        for city_info in city_infos:
            city_parts = city_info.strip("'").split('|')
            city_code = city_parts[0].split('-')[0]
            city_name = city_parts[0].split('-')[1]
            county_code = city_parts
            if city_name not in city_data[prov_code]:
                city_data[prov_code].append(city_name)
            if city_name not in county_data[prov_code]:
                county_data[prov_code][city_name] = []
            county_data[prov_code][city_name].append((county_code, city_name))
    return city_data, county_data

# 更新市下拉框
def update_city(event):
    selected_prov = province_cb.get()
    province_code = [k for k, v in province_data.items() if v == selected_prov][0]
    cities = city_data.get(province_code, [])
    if cities:
        city_cb['values'] = cities
        city_cb.current(0)
        update_county(None)
        update_output_filename()

# 更新区县下拉框
def update_county(event):
    selected_prov = province_cb.get()
    province_code = [k for k, v in province_data.items() if v == selected_prov][0]
    selected_city = city_cb.get()
    counties = county_data[province_code].get(selected_city, [])
    if counties:
        county_cb['values'] =[c.split("-")[1] for c in counties[0][0]]
        county_cb.current(0)
        show_code(None)
        update_output_filename()

# 显示区县代码
def show_code(event):
    selected_county = county_cb.get()
    if selected_county:
        for prov_code in county_data:
            for city_name in county_data[prov_code]:
                for county_info in county_data[prov_code][city_name]:
                    for county in county_info[0]:
                        if selected_county in county:
                            county_code = county.split('-')[0]
                            label_code.config(text=f"区县代码: {county_code}")
                            update_output_filename()
                            return

# 爬取数据
def crawl_html(county_code, year, month):
    """依据年月爬取对应的数据"""
    url = "https://tianqi.2345.com/Pc/GetHistory"
    params = {
        'areaInfo[areaId]': county_code,
        'areaInfo[areaType]': 2,
        'date[year]': year,
        'date[month]': month
    }
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
        'Accept-Language': 'en-US,en;q=0.9',
        'Connection': 'keep-alive',
        'Referer': 'https://tianqi.2345.com/'
    }
    response = requests.get(url, headers=headers, params=params)
    
    if response.status_code == 403:
        print(f"Access forbidden for {year}-{month}")
        return None

    data = response.json()["data"]
    df = pd.read_html(data)[0]
    return year, month, df

def parse_month_input(month_input):
    """解析月份输入,支持范围和列表"""
    months = set()
    for part in month_input.split(','):
        if '-' in part:
            start_month, end_month = map(int, part.split('-'))
            months.update(range(start_month, end_month + 1))
        else:
            months.add(int(part))
    return sorted(months)

def start_scraping():
    start_year = int(start_year_entry.get())
    end_year = int(end_year_entry.get())
    month_input = month_entry.get()
    output_folder = output_path_entry.get()

    if not output_folder:
        output_folder = filedialog.askdirectory()
        if not output_folder:
            return
        output_file = os.path.join(output_folder, "城市名_起始年份_结束年份.xlsx")
    else:
        output_file = output_folder

    if not month_input:
        text_box.insert(tk.END, "月份输入错误\n") 
        #status_label.config(text="月份输入错误")
        return

    months = parse_month_input(month_input)
    
    county_code = label_code.cget("text").split(": ")[-1]
    
    if not county_code:
        text_box.insert(tk.END, "区县代码未找到\n") 
        #status_label.config(text="区县代码未找到")
        return


    # 获取当前年月
    current_year = datetime.now().year
    current_month = datetime.now().month

    # 计算总月份数
    total_months = (end_year - start_year + 1) * len(months)
    
    df_list = []
    month_count = 0

    for year in range(start_year, end_year + 1):
        for month in months:
            # 跳过未来月份
            if (year > current_year) or (year == current_year and month >= current_month):
                continue

            #print(f"爬取:{year}年 {month}月")
            result = crawl_html(county_code, year, month)
            if result is not None:
                year, month, df = result
                df_list.append(df)
            month_count += 1
            progress_bar['value'] = (month_count / total_months) * 100
            #status_label.config(text=f"爬取:{year}年 {month}月")


            text_box.insert(tk.END, f"爬取:{year}年 {month}月\n")  
            text_box.see(tk.END)  # 确保滚动条滚动到最新内容 

            root.update_idletasks()
            time.sleep(random.uniform(1, 3))
    if df_list:
        pd.concat(df_list).to_excel(output_file, index=False)
                #status_label.config(text="数据已保存")
        text_box.insert(tk.END, "数据已保存\n")  
        text_box.see(tk.END)  # 确保滚动条滚动到最新内容 
    else:
        text_box.insert(tk.END, "未能获取到任何数据\n")  
        text_box.see(tk.END)  # 确保滚动条滚动到最新内容 

    # 提示抓取完成
    #status_label.config(text="抓取完成!")
    text_box.insert(tk.END, "抓取完成!\n") 
    text_box.see(tk.END)  # 确保滚动条滚动到最新内容  
    progress_bar['value'] = 100
    root.update_idletasks()

def update_output_filename(event=None):
    # Retrieve current values
    county_name = county_cb.get()
    start_year = start_year_entry.get()
    end_year = end_year_entry.get()
    month_input = month_entry.get()
    folder_path = output_path_entry.get()
    
    # Check if folder path is set
    if folder_path and os.path.isdir(os.path.dirname(folder_path)):
        # Update file name part based on inputs
        if len(start_year) == 4 and len(end_year) == 4 and month_input:
            file_name = f"{county_name.split(' ')[1]}_{start_year}_{end_year}.xlsx"
        else:
            file_name = "城市名_起始年份_结束年份.xlsx"
        
        # Combine folder path with updated file name
        output_filename = os.path.join(os.path.dirname(folder_path), file_name).replace("\\", "/")
        output_path_entry.delete(0, tk.END)
        output_path_entry.insert(0, output_filename)
    else:
        # Default file name if no valid folder path
        if len(start_year) == 4 and len(end_year) == 4 and month_input:
            output_filename = f"{county_name.split(' ')[1]}_{start_year}_{end_year}.xlsx"
        else:
            output_filename = "城市名_起始年份_结束年份.xlsx"
        
        output_path_entry.delete(0, tk.END)
        output_path_entry.insert(0, output_filename)

# 选择文件路径按钮
def browse_file():
    folder_path = filedialog.askdirectory()
    if folder_path:
        city_name = county_cb.get()
        start_year = start_year_entry.get()
        end_year = end_year_entry.get()
        file_name = f"{city_name.split(' ')[1]}_{start_year}_{end_year}.xlsx"
        file_path = os.path.join(folder_path, file_name).replace("\\", "/")
        output_path_entry.delete(0, tk.END)
        output_path_entry.insert(0, file_path)

# 主程序

province_data = get_province_data()
city_data, county_data = get_city_county_data()

# 主帧
root = tk.Tk()
root.title("抓取天气")
root.geometry("480x250")

# 省选择框
province_frame = tk.Frame(root)
province_frame.pack(fill=X, pady=(5, 0))
province_Label=Label(province_frame, text="省:")
province_Label.pack(side=LEFT, padx=(42, 0))
province_cb = ttk.Combobox(province_frame, values=[v for k, v in province_data.items()],width=10)
province_cb.pack(side=LEFT, padx=(0, 5))
province_cb.bind("<<ComboboxSelected>>", update_city)
province_cb.current(0)

# 市选择框
city_Label=Label(province_frame, text="市:")
city_Label.pack(side=LEFT, padx=(42, 0))
city_cb = ttk.Combobox(province_frame,width=10)
city_cb.pack(side=LEFT, padx=(0, 5))
city_cb.bind("<<ComboboxSelected>>", update_county)

# 区县选择框
county_Label=Label(province_frame, text="区县:")
county_Label.pack(side=LEFT, padx=(30, 0))
county_cb = ttk.Combobox(province_frame,width=10)
county_cb.pack(side=LEFT, padx=(0, 5))
county_cb.bind("<<ComboboxSelected>>", show_code)

# 显示区县代码
label_code = Label(province_frame, text="区县代码:",width=13)
label_code.pack(side=LEFT, padx=(4, 0))

# 起始年份输入框
date_frame = tk.Frame(root)
date_frame.pack(fill=X, pady=(5, 0))
start_year_Label=Label(date_frame, text="起始年份:")
start_year_Label.pack(side=LEFT, padx=(5, 0))
start_year_entry = Entry(date_frame,width=10)
start_year_entry.pack(side=LEFT, padx=(0,5))
start_year_entry.bind("<KeyRelease>", update_output_filename)

# 结束年份输入框
end_year_Label=Label(date_frame, text="结束年份:")
end_year_Label.pack(side=LEFT, padx=(5, 0))
end_year_entry = Entry(date_frame,width=10)
end_year_entry.pack(side=LEFT, padx=(0, 5))
end_year_entry.bind("<KeyRelease>", update_output_filename)

# 月份输入框
month_Label = Label(date_frame, text="月份(如1-3或1,2,5):")
month_Label.pack(side=LEFT, padx=(5, 0))
month_entry = Entry(date_frame, width=20)
month_entry.pack(side=LEFT, padx=(0, 5))
month_entry.bind("<KeyRelease>", update_output_filename)

# 文件路径输入框
output_frame = tk.Frame(root)
output_frame.pack(fill=X, pady=(5, 0))
output_path_Label=Label(output_frame, text="输出路径:")
output_path_Label.pack(side=LEFT, padx=(5, 0))
output_path_entry = Entry(output_frame,width=45)
output_path_entry.pack(side=LEFT, padx=(0, 0))
output_path_entry.insert(0, "城市名_起始年份_结束年份.xlsx")

# 选择文件路径按钮
browse_button = Button(output_frame, text="选择文件路径", command=browse_file,width=13)
browse_button.pack(side=LEFT, padx=(1, 5))

# 进度条
progress_frame = tk.Frame(root)
progress_frame.pack(fill=X)
progress_Label=Label(progress_frame, text="进度:")
progress_Label.pack(side=LEFT, padx=(30, 0))
progress_bar = ttk.Progressbar(progress_frame, orient="horizontal", length=318, mode="determinate")
progress_bar.pack(side=LEFT, padx=(0, 0))

# 开始抓取按钮
start_button = Button(progress_frame, text="开始抓取", command=start_scraping,width=13)
start_button.pack(side=LEFT, padx=(1, 5))

# 状态标签
#status_label = Label(progress_frame, text="")
#status_label.pack(fill=X)

# 创建一个滚动文本框  
text_box_frame = tk.Frame(root)
text_box_frame.pack(fill=X,padx=(5, 0))
text_box = scrolledtext.ScrolledText(text_box_frame)
text_box.pack(side=LEFT,pady=(5,5))  


# 绑定更新函数到相关组件
start_year_entry.bind("<KeyRelease>", update_output_filename)
end_year_entry.bind("<KeyRelease>", update_output_filename)
month_entry.bind("<KeyRelease>", update_output_filename)

update_city(None)

root.mainloop()

  • 3
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值