写在前面:最近论文要用到的数据很多,涉及7对并置站从2010年到2023年的全部数据
今天鼓捣了一整天,想用快速方便的程序来整理整理,仅供参考
部分路径需要修改,取走用的时候注意看
首先不能随便动我的原始数据,dddd,好不容易下载完的
需求一:创建跟我的文件夹结构一致的新文件夹系列
这个很简单,用bat文件就好
分别创建了2010年到2023年,子文件夹分别包括7组站点,站点文件夹内部分别新建了示例文件,以便后续处理
@echo off
setlocal enabledelayedexpansion
for /L %%Y in (2010,1,2023) do (
set "y=%%Y"
set "y=!y:~-2!"
echo y value: !y!
mkdir "year_%%Y"
cd "year_%%Y"
for %%F in (BAKO_CIBG CHPI_CHPG KOKB_KOKV PTAG_PTGG YAR2_YARR YEL2_YELL THTI_THTG) do (
echo Creating files in year_%%Y\%%F...
mkdir "%%F"
cd "%%F"
set "prefix=%%F"
set "prefix=!prefix:~0,4!"
set "suffix=%%F"
set "suffix=!suffix:~-4!"
copy nul !prefix!0010.!y!o > nul
copy nul !prefix!3650.!y!o > nul
copy nul !suffix!0010.!y!o > nul
copy nul !suffix!3650.!y!o > nul
cd ..
)
cd ..
)
echo Files created successfully!
pause
结果如图:
需求二:将下载完的数据分类整理
因为我之前弄过一部分,所以数据有点乱,这一段是我个人的数据调整,估计适用型不高,可以直接跳过。
主要目的是将站点文件夹内的下载数据再统一收到RINEX子文件夹中,方便下一步存入其他类型数据。
# 将所有文件归类存入RINEX文件夹中
import os
import shutil
def process_folder(folder_path):
# Check if 'useful' folder exists
useful_folder_path = os.path.join(folder_path, 'useful')
rinex_folder_path = os.path.join(folder_path, 'RINEX')
if os.path.exists(useful_folder_path):
os.rename(useful_folder_path, rinex_folder_path)
elif not os.path.exists(rinex_folder_path):
os.mkdir(rinex_folder_path)
# Get the parent folder name (last two digits) and construct the desired suffix
parent_folder_name = os.path.basename(os.path.dirname(folder_path))
desired_suffix = parent_folder_name[-2:] + 'o'
# Move files with the desired suffix to RINEX folder
for file in os.listdir(folder_path):
if file.endswith(desired_suffix):
file_path = os.path.join(folder_path, file)
new_file_path = os.path.join(rinex_folder_path, file)
shutil.move(file_path, new_file_path)
# Delete .txt files
for file in os.listdir(folder_path):
if file.endswith('.txt'):
file_path = os.path.join(folder_path, file)
os.remove(file_path)
def main():
# base_path = os.getcwd() # Assuming the script is in the same directory as the year folders
# Specify the base path to the directory containing year folders
base_path = "F:\\test"
for year in range(2010, 2024):
year_folder = os.path.join(base_path, f'year_{year}')
for subfolder in ['BAKO_CIBG', 'CHPI_CHPG', 'KOKB_KOKV', 'PTAG_PTGG', 'YAR2_YARR', 'YEL2_YELL', 'THTI_THTG']:
subfolder_path = os.path.join(year_folder, subfolder)
if os.path.exists(subfolder_path):
process_folder(subfolder_path)
if __name__ == "__main__":
main()
需求三:正题开始,统计子文件夹文件个数,比较两类文件日期相同的个数,结果输出至Excel表保存
直接上代码和结果
# 文件夹内各文件的数量统计
import os
from openpyxl import Workbook
# 定义基本目录
base_directory = 'H:\\GPS_S4c\\data'
# 定义年份和站点文件夹
years = list(range(2010, 2024))
sites = ['YEL2_YELL', 'KOKB_KOKV', 'PTAG_PTGG',
'BAKO_CIBG', 'THTI_THTG', 'CHPI_CHPG', 'YAR2_YARR']
# 定义RINEX文件夹和文件模式
rinex_folder = 'RINEX'
# 定义将年份转换为特定格式的函数
def year_str(year):
return f"year_{year}"
# 初始化Excel工作簿
wb = Workbook()
ws = wb.active
# 添加表头
header = ['Site\\Year']
for year in years:
header.extend([str(year)])
ws.append(header)
# 遍历站点
for site in sites:
# 初始化每个站点的文件统计字典
files_count = {year: {'start': [], 'end': []} for year in years}
# 初始化每个站点的一致计数器
consistent_count = {year: 0 for year in years}
# 遍历年份
for year in years:
site_folder = os.path.join(base_directory, year_str(year), site, rinex_folder)
if os.path.exists(site_folder):
files = os.listdir(site_folder)
for file in files:
if file.startswith(site[:4].lower()):
files_count[year]['start'].append(file) # 存储文件名
if file.startswith(site[5:].lower()):
files_count[year]['end'].append(file) # 存储文件名
# 统计每年相同元素的数量并存储在新数组中
start_chars = {year: [file[4:7] for file in files_count[year]['start']] for year in years}
end_chars = {year: [file[4:7] for file in files_count[year]['end']] for year in years}
# 添加[site[:4]]行
row_start = [f'{site[:4]}']
for year in years:
row_start.extend([len(files_count[year]['start'])])
ws.append(row_start)
# 添加[site[5:]]行
row_end = [f'{site[5:]}']
for year in years:
row_end.extend([len(files_count[year]['end'])])
ws.append(row_end)
# 输出一致计数结果至表格
row_consistent = ['Consistent Count']
for year in years:
common_elements = len(set(start_chars[year]) & set(end_chars[year]))
consistent_count[year] = common_elements
row_consistent.extend([consistent_count[year]])
ws.append(row_consistent)
# 添加空行
ws.append([])
# 保存Excel文件
excel_file_path = os.path.join(base_directory, 'summary.xlsx')
wb.save(excel_file_path)
需求四:最后将不匹配的日期单独摘出来放txt文件,后续需要二次下载筛选
我沿用了三的代码,有一点重复,但我懒得改了
# 缺失文件数量及具体日期统计,输出txt文件
import os
from openpyxl import Workbook
# 定义基本目录
base_directory = 'H:\\GPS_S4c\\data'
# 定义年份和站点文件夹
years = list(range(2010, 2024))
sites = ['YEL2_YELL', 'KOKB_KOKV', 'PTAG_PTGG',
'BAKO_CIBG', 'THTI_THTG', 'CHPI_CHPG', 'YAR2_YARR']
# 定义RINEX文件夹和文件模式
rinex_folder = 'RINEX'
# 定义将年份转换为特定格式的函数
def year_str(year):
return f"year_{year}"
# 初始化缺失字符集合
missing_chars_start = {year: set() for year in years}
missing_chars_end = {year: set() for year in years}
# 生成从001到365的集合
all_days = {str(day).zfill(3) for day in range(1, 366)}
sorted_days = sorted(all_days)
# 创建"Missing"文件夹
missing_folder = os.path.join(base_directory, 'Missing')
os.makedirs(missing_folder, exist_ok=True)
# 遍历站点
for site in sites:
# 初始化每个站点的文件统计字典
files_count = {year: {'start': [], 'end': []} for year in years}
# 初始化每个站点的一致计数器
consistent_count = {year: 0 for year in years}
# 遍历年份
for year in years:
site_folder = os.path.join(base_directory, year_str(year), site, rinex_folder)
if os.path.exists(site_folder):
files = os.listdir(site_folder)
for file in files:
if file.startswith(site[:4].lower()):
files_count[year]['start'].append(file) # 存储文件名
if file.startswith(site[5:].lower()):
files_count[year]['end'].append(file) # 存储文件名
# 统计每年相同元素的数量并存储在新数组中
start_chars = {year: [file[4:7] for file in files_count[year]['start']] for year in years}
end_chars = {year: [file[4:7] for file in files_count[year]['end']] for year in years}
# 对比并统计缺失的字符
for year in years:
missing_chars_start[year] = all_days.difference(start_chars[year])
missing_chars_end[year] = all_days.difference(end_chars[year])
# 保存缺失的字符至txt文件
with open(os.path.join(missing_folder, f"{site[:4]}_missing.txt"), 'w') as f:
for year in years:
f.write(f"{year} {len(missing_chars_start[year])} {sorted(missing_chars_start[year])}\n")
with open(os.path.join(missing_folder, f"{site[5:]}_missing.txt"), 'w') as f:
for year in years:
f.write(f"{year} {len(missing_chars_end[year])} {sorted(missing_chars_end[year])}\n")
上传也给自己保存一下,辛苦一天总算还是有结果的,要是能帮助到一些人,那就再好不过了!
补:后面的代码都是Python写的