风雨气温图数据是使用jpg图片的方式交换数据 rgb三通道编码
因为他的数据都是文件服务形式。因此只要把路径分析好之后变成简单的下载文件业务了。
拼地址的代码是用java写的如下 :
import java.io.FileWriter;
import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
public class MakeURL {
public static void main(String[] args) throws ParseException {
String begenStr = "2018-01-01";//utc 8:00
String endStr = "2023-03-08";//utc 23:00
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd");
SimpleDateFormat sdf1 = new SimpleDateFormat("yyyy-MM-dd-HH");
SimpleDateFormat sdf2 = new SimpleDateFormat("yyyyMMdd_HH");
List<String> prodList = new ArrayList<>();
prodList.add("vitr_u_10_m");
prodList.add("vitr_v_10_m");
prodList.add("vitr_u_100_m");
prodList.add("vitr_v_100_m");
prodList.add("vitr_u_95000_pa");
prodList.add("vitr_v_95000_pa");
prodList.add("vitr_u_92500_pa");
prodList.add("vitr_v_92500_pa");
prodList.add("vitr_u_90000_pa");
prodList.add("vitr_v_90000_pa");
prodList.add("vitr_u_85000_pa");
prodList.add("vitr_v_85000_pa");
prodList.add("vitr_u_85000_pa");
prodList.add("vitr_v_85000_pa");
prodList.add("vitr_u_80000_pa");
prodList.add("vitr_v_80000_pa");
prodList.add("vitr_u_75000_pa");
prodList.add("vitr_v_75000_pa");
prodList.add("vitr_u_70000_pa");
prodList.add("vitr_v_70000_pa");
prodList.add("vitr_u_65000_pa");
prodList.add("vitr_v_65000_pa");
prodList.add("vitr_u_60000_pa");
prodList.add("vitr_v_60000_pa");
prodList.add("vitr_u_50000_pa");
prodList.add("vitr_v_50000_pa");
prodList.add("vitr_u_30000_pa");
prodList.add("vitr_v_30000_pa");
prodList.add("vitr_u_20000_pa");
prodList.add("vitr_v_20000_pa");
prodList.add("vitr_u_1000_pa");
prodList.add("vitr_v_1000_pa");
long begen = simpleDateFormat.parse(begenStr).getTime();
long end = simpleDateFormat.parse(endStr).getTime() + 1000 * 60 * 60 * 24L;
Long oneDay = 1000 * 60 * 60 * 3L;//One Day TimeStamp
//https://data.ventusky.com/2023/03/09/gfs/whole_world/hour_00/gfs_vitr_u_95000_pa_20230309_00.jpg
//https://data.ventusky.com/2023/03/09/gfs/whole_world/hour_00/gfs_vitr_v_95000_pa_20230309_00.jpg
//https://data.ventusky.com/2023/03/09/gfs/whole_world/hour_00/gfs_vitr_u_65000_pa_20230309_00.jpg
//https://data.ventusky.com/2023/03/09/gfs/whole_world/hour_00/gfs_vitr_v_65000_pa_20230309_00.jpg
String urlTmeplate = "https://data.ventusky.com/%s/%s/%s/%s/whole_world/hour_%s/%s_%s_%s.jpg";
try (FileWriter writer = new FileWriter("gfs-wind-2018-now.txt")) {
writer.write("");//清空原文件内容
while (begen <= end) {
Date d = new Date(begen);
String currentTime1 = sdf1.format(d);
String[] split = currentTime1.split("-");
String currentTime2 = sdf2.format(d);
prodList.forEach(prod -> {
String line = String.format(urlTmeplate, split[0], split[1], split[2], "gfs", split[3], "gfs", prod, currentTime2);
try {
writer.write(line + "\n");
} catch (IOException e) {
throw new RuntimeException(e);
}
});
begen += oneDay;
}
writer.flush();
} catch (IOException e) {
e.printStackTrace();
}
/**
<select>
<option value="wind-10m" style="font-weight: bold;">地上10米</option>
<option value="wind-100m" style="">地上100米</option>
<option value="wind-950hpa" style="">950百帕,500米</option>
<option value="wind-925hpa" style="">925百帕,750米</option>
<option value="wind-900hpa" style="">900百帕,1000米</option>
<option value="wind-850hpa" style="font-weight: bold;">850百帕,1500米</option>
<option value="wind-800hpa" style="">800百帕,2000米</option>
<option value="wind-750hpa" style="">750百帕,2500米</option>
<option value="wind-700hpa" style="font-weight: bold;">700百帕,3000米</option>
<option value="wind-650hpa" style="">650百帕,3600米</option>
<option value="wind-600hpa" style="">600百帕,4200米</option>
<option value="wind-500hpa" style="font-weight: bold;">500百帕,5500米</option>
<option value="wind-300hpa" style="font-weight: bold;">300百帕,9000米</option>
<option value="wind-200hpa" style="">200百帕,12000米</option>
<option value="wind-10hpa" style="">10 hPa, 30000 m</option>
</select>
*/
}
}
下数据的代码是用python写的如下:
import wget
import os
from queue import Queue
import threading
url_queue = Queue() # 指定一个长度为50的队列
# Open file
fileHandler = open("gfs-wind-2018-now.txt", "r")
basePath = "D:\\data\\gfs-wind"
# Get list of all lines in file
class Crawl_Thread(threading.Thread):
def __init__(self, thread_id, queue):
threading.Thread.__init__(self) # 需要对父类的构造函数进行初始化
self.thread_id = thread_id
self.queue = queue # 任务队列
def run(self):
'''
线程在调用过程中就会调用对应的run方法
:return:
'''
print('启动线程:', self.thread_id)
self.crawl_spider()
print('退出了该线程:', self.thread_id)
def crawl_spider(self):
while True:
if self.queue.empty(): # 如果队列为空,则跳出
break
else:
url = self.queue.get()
print('当前工作的线程为:', self.thread_id, " 正在采集:", url)
try:
#获取年份
i0 = url.find("/20")
#获取文件名
i1 = url.rfind("/")
# fileName = url[i1 + 1:]
path = os.path.join(basePath, url[i0 + 1:i1])
if not os.path.exists(path):
os.makedirs(path)
fileName = wget.filename_from_url(url)
full_path = os.path.join(path,fileName)
if not os.path.exists(full_path):
wget.download(url, out=full_path)
except Exception as e:
print(str(e))
print("错误:" + url)
if __name__ == '__main__':
i = 0
crawl_threads = []
crawl_name_list = ['crawl_1', 'crawl_2', 'crawl_3', 'crawl_4', 'crawl_5', 'crawl_6', 'crawl_7',
'crawl_8']
while True:
print(i)
i = i + 1
url = fileHandler.readline()
if not url:
break
url_queue.put(url)
fileHandler.close()
for thread_id in crawl_name_list:
thread = Crawl_Thread(thread_id, url_queue) # 启动爬虫线程
thread.start() # 启动线程
crawl_threads.append(thread)
while not url_queue.empty(): # 判断是否为空
pass # 不为空,则继续阻塞
for t in crawl_threads:
t.join()
print("结束")