爬虫任务1

这里写目录标题

西安天气爬取

代码

# -*- coding: utf-8 -*-
"""
Created on Wed Mar 31 22:19:37 2021

@author: ASUS
"""
import requests
from lxml import etree
if __name__ == "__main__":
    url = 'https://lishi.tianqi.com/xian/%d/'
    headers = {
        'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
        }
    fp =open('./天气.txt','w',encoding='utf-8')
    for month in range(201901,201913):
        new_url = format(url%month)
        page_text = requests.get(url=new_url,headers=headers).content
        tree = etree.HTML(page_text)
        div_list = tree.xpath('//div[@class="tian_three"]/ul/li/div')
        xian_weathers = []
        i = 0
        for div in div_list:
            xian_weather = div.xpath('./text()')[0] 
            i+=1
            if i == 5:
                i = 0
                xian_weathers.append(xian_weather+'\n')
            else:
                xian_weathers.append(xian_weather+',')
        fp.write(''.join(xian_weathers))

csv代码

# -*- coding: utf-8 -*-
"""
Created on Wed Mar 31 22:19:37 2021

@author: ASUS
"""
import requests
from lxml import etree
import csv
import sys
if __name__ == "__main__":
    url = 'https://lishi.tianqi.com/xian/%d/'
    headers = {
        'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
        }
    fp =open('./天气.txt','w',encoding='utf-8')
    for month in range(201901,201913):
        new_url = format(url%month)
        page_text = requests.get(url=new_url,headers=headers).text
        tree = etree.HTML(page_text)
        div_list = tree.xpath('//div[@class="tian_three"]/ul/li/div')
        xian_weathers = []
        i = 0
        for div in div_list:
            xian_weather = div.xpath('./text()')[0] 
            i+=1
            if i == 5:
                i = 0
                xian_weathers.append(xian_weather+'\n')
            else:
                xian_weathers.append(xian_weather+',')
        fp.write(''.join(xian_weathers))

input_file = sys.argv[1]
output_file = sys.argv[2]

with open(input_file,'r',newline='') as csv_in_file:
    with open(output_file,'w',newline='') as csv_out_file:
        filereader = csv.reader(csv_in_file)
        filerwriter = csv.writer(csv_out_file)
        for row_list in filereader:
            filewriter.writerow(row_list)
            

# -*- coding: utf-8 -*-
"""
Created on Wed Mar 31 22:19:37 2021

@author: ASUS
"""
import requests
from lxml import etree
import numpy as np
import pandas as pd
if __name__ == "__main__":
    url = 'https://lishi.tianqi.com/xian/%d/'
    headers = {
        'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
        }
    fp =open('./天气.txt','w',encoding='utf-8')
    for month in range(201901,201913):
        new_url = format(url%month)
        page_text = requests.get(url=new_url,headers=headers).text
        tree = etree.HTML(page_text)
        div_list = tree.xpath('//div[@class="tian_three"]/ul/li/div')
        xian_weathers = []
        i = 0
        for div in div_list:
            xian_weather = div.xpath('./text()')[0] 
            i+=1
            if i == 5:
                i = 0
                xian_weathers.append(xian_weather+'\n')
            else:
                xian_weathers.append(xian_weather+',')
        fp.write(''.join(xian_weathers))
 
    txt = np.loadtxt('天气.txt')
    txtDF = pd.DataFrame(txt)
    txtDF.to_csv('天气.csv',index=False)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值