涉及一些常用代码:
- 批量读取文件;
- txt文件读取;
- 生成时间序列;
- 时间格式处理成 年-月-日 按列排放;
- 矩阵拼接
# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
import glob
# station_id = pd.read_csv(r'C:\Users\ZhanLF\Desktop\华东站点.txt', header=None)
# 按照站号整理日最高气温
station_ids = np.loadtxt(r'name.txt', dtype=np.int32, delimiter=',') # read in station id
files = glob.glob(r'*.txt')
result = np.zeros([len(files), len(station_ids)])
r = -1
for file in files:
r = r + 1
print('正在处理' + file)
data = np.loadtxt(file, dtype=np.float32, delimiter=' ', skiprows=3, usecols=range(1, 10),encoding='utf-8')
r1 = -1
for id in station_ids:
r1 += 1
r2 = 0
for id_file in data[:, 0]:
if id == id_file:
result[r, r1] = data[r2, 8]
r2 += 1
# 在前三列添加时间
time_series = pd.date_range(start='20180101000000', end='20210731000000') # 生成时间序列
date = np.zeros([len(time_series), 3])
r = 0
for time_serie in time_series:
t_str = str(time_serie)
date[r, 0:3] = np.int32([t_str[0:4], t_str[5:7], t_str[8:10]])
r = r + 1
output = np.hstack([date, result])
output_pd = pd.DataFrame(output)
output_pd.to_excel('out.xlsx')