import os
import time
import datetime
import struct
import numpy as np
import xarray as xr
import time
from interval import Interval
import json
import requests
Load_Path = "C:\\Users\\iscas\\Downloads\\信件打包.zip"
Eml_Path = "./NewMail" # 第一个参数为eml所在文件夹
Annex_Path = "./NewRKW" # 第二个参数为eml附件输出的路径
AWT_Path = "./NewAWT1" # 第二个参数为eml附件输出的路径
NC_Path = r"Z:\weather\212-data\BVS2" # 第二个参数为eml附件输出的路径
def mkdir(path):
import os
path = path.strip()
path = path.rstrip("\\")
isExists = os.path.exists(path)
if not isExists:
os.makedirs(path)
return True
else:
return False
class AwtHead:
jdLon = 0
jdLat = 0
time = 0
maxlat = 0
maxlon = 0
minlat = 0
minlon = 0
rowCount = 0
columnCount = 0
type = "none"
data = []
def decode_File(input_file, head, outputdir):
data = open(input_file, 'rb').read()
if len(data) < 14:
print("文件格式不符合")
return
shortlist = struct.unpack('h'*7, data[0:14])
head.jdLon = shortlist[0]
head.jdLat = shortlist[1]
filesplit = input_file.split('_')
filetime = datetime.datetime.strptime(
filesplit[len(filesplit)-2] + filesplit[len(filesplit)-1][:2]+':00:00', "%Y%m%d%H:%M:%S")
head.time = np.array([filetime]).astype('datetime64[ns]')
head.maxlat = shortlist[3]
head.minlat = shortlist[4]
head.minlon = shortlist[5]
head.maxlon = shortlist[6]
head.rowCount = int((head.maxlat-head.minlat)*100/head.jdLat+1)
head.columnCount = int((head.maxlon-head.minlon)*100/head.jdLon+1)
lat, lon = get_latlon(head.jdLon, head.jdLat,
head.maxlon, head.maxlat, head.minlon, head.minlat)
outputdir = os.path.join(outputdir, head.type)
mkdir(outputdir)
if head.type == "vesselicing" or head.type == "pressure" or head.type == "height500mb" or head.type == "sst" or head.type == "visibility" or head.type == "wxtype" or head.type == "cloud" or head.type == "temperature" or head.type == "rhum":
head.data = struct.unpack(
'f' * int(head.rowCount * head.columnCount), data[14:len(data)])
da1, = get_values(head.data, head.rowCount,
head.columnCount, var_nums=1)
write_data_to_nc(input_file, head.time, head.rowCount, head.columnCount,
head.type, lat, lon, outputdir, var_nums=1, da1=da1)
if head.type == "wind" or head.type == "current":
head.data = struct.unpack(
'ff' * int(head.rowCount * head.columnCount), data[14:len(data)])
da1, da2 = get_values(head.data, head.rowCount,
head.columnCount, var_nums=2)
write_data_to_nc(input_file, head.time, head.rowCount, head.columnCount,
head.type, lat, lon, outputdir, var_nums=2, da1=da1, da2=da2)
if head.type == "wave" or head.type == "seas" or head.type == "swell":
head.data = struct.unpack(
'fff' * int(head.rowCount * head.columnCount), data[14:len(data)])
da1, da2, da3 = get_values(
head.data, head.rowCount, head.columnCount, var_nums=3)
write_data_to_nc(input_file, head.time, head.rowCount, head.columnCount,
head.type, lat, lon, outputdir, var_nums=3, da1=da1, da2=da2, da3=da3)
return
def get_latlon(jdLon, jdLat, maxlon, maxlat, minlon, minlat):
lat_1d = np.arange(maxlat, minlat-0.01, -jdLat/100.0).astype(np.float32)
lon_1d = np.arange(minlon, maxlon+0.01, jdLon/100.0).astype(np.float32)
lon_2d, lat_2d = np.meshgrid(lon_1d, lat_1d) # bvs经纬度
return lat_2d, lon_2d
def get_values(data, rowCount, columnCount, var_nums=3):
values = []
for i in range(var_nums):
values.append(np.array(data[i::var_nums], dtype=np.float32).reshape(
rowCount, columnCount))
return values
def write_data_to_nc(in_file, headtime, headrowCount, headcolumnCount, headtype, lat, lon, outputdir, var_nums=1, da1=None, da2=None, da3=None):
ncfile = xr.Dataset()
ncfile['time'] = (['time'], headtime)
ncfile.time.attrs = {'long_name': '时间'}
ncfile.time.encoding['units'] = "seconds since 1970-01-01 00:00:00"
ncfile['xlat'] = (['xlat'], np.arange(headrowCount).astype(np.float32))
ncfile.xlat.attrs = {'long_name': 'Latitude',
'axis': 'Y', 'units': "degrees_north"}
ncfile['xlong'] = (['xlong'], np.arange(
headcolumnCount).astype(np.float32))
ncfile.xlong.attrs = {'long_name': 'Longitude',
'axis': 'X', 'units': "degrees_east"}
ncfile['lat'] = (['xlat', 'xlong'], lat)
ncfile.lat.attrs = {'long_name': 'Latitude', 'units': 'degrees_north'}
ncfile['lon'] = (['xlat', 'xlong'], lon)
ncfile.lon.attrs = {'long_name': 'Longitude', 'units': 'degrees_east'}
if var_nums == 1:
if headtype == "temperature":
varname1 = headtype+'_2m'
else:
varname1 = headtype
ncfile[varname1] = (['xlat', 'xlong'], da1)
if var_nums == 2:
if headtype == "wind":
varname1 = 'u10_'+headtype
varname2 = 'v10_'+headtype
if headtype == "current":
varname1 = 'u_'+headtype
varname2 = 'v_'+headtype
ncfile[varname1] = (['xlat', 'xlong'], da1)
ncfile[varname2] = (['xlat', 'xlong'], da2)
if var_nums == 3:
varname1 = headtype+'_height'
ncfile[varname1] = (['xlat', 'xlong'], da1)
varname2 = headtype+'_direction'
ncfile[varname2] = (['xlat', 'xlong'], da2)
varname3 = headtype+'_period'
ncfile[varname3] = (['xlat', 'xlong'], da3)
ncfilename = os.path.basename(in_file)+'.nc'
out_file = os.path.join(outputdir, ncfilename)
ncfile.to_netcdf(out_file)
def decode_Dir(inputdir, outputdir):
typename = ["wind", "swell", "seas", "wave", "current", "vesselicing", "pressure",
"height500mb", "sst", "visibility", "wxtype", "cloud", "temperature", "rhum"]
for tnm in typename:
if os.path.exists(os.path.join(inputdir, tnm)):
filelist = os.listdir(os.path.join(inputdir, tnm))
for input_file in filelist:
head = AwtHead()
head.type = tnm
decode_File(os.path.join(inputdir, tnm,
input_file), head, outputdir)
import email
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
import os
import time
import zipfile
import shutil
import subprocess
from ctypes import *
def init_browser(): #模拟打开浏览器,
browser = webdriver.Chrome(r'C:\Program Files\Google\Chrome\Application\chromedriver.exe')
browser.implicitly_wait(10)
wait = WebDriverWait(browser, 1)
browser.get('https://email.163.com/')
time.sleep(1)
iframe = browser.find_element(By.XPATH, '//div[@id="urs163Area"]/iframe')
browser.switch_to.frame(iframe)
browser.find_element(By.NAME, "email").send_keys('')#邮箱账号
time.sleep(1)
browser.find_element(By.NAME, "password").send_keys('')#邮箱密码
time.sleep(1)
browser.find_element(By.ID, 'dologin').click()
time.sleep(3)
browser.current_window_handle
#browser.find_element(By.ID, '_mail_component_147_147').click()
try:
browser.find_element(By.ID, '_mail_component_83_83').click()
except:
browser.find_element(By.ID, '_mail_component_82_82').click()
time.sleep(1)
browser.find_element(By.XPATH, '//div[@id="_dvModuleContainer_mbox.ListModule_0"]/header/div/div[1]').click()
time.sleep(1)
browser.find_element(By.XPATH, '//div[@id="_dvModuleContainer_mbox.ListModule_0"]/header/div/div[4]/div[3]').click()
time.sleep(1)
browser.current_window_handle
browser.find_element(By.XPATH, '//div[@style="visibility: visible; left: 528px; top: 124px;"]/div[1]').click()
time.sleep(10)
while 1:
time.sleep(1)
if os.path.exists(Load_Path):
break
return
def unzip(spath,dpath):
zip_file = zipfile.ZipFile(spath, 'r')
zip_list = zip_file.namelist() # 得到压缩包里所有文件
for f in zip_list:
zip_file.extract(f, dpath) # 循环解压文件到指定目录
os.rename(dpath+"\\"+f, dpath+"\\"+f.encode('cp437').decode('gbk'))
zip_file.close() # 关闭文件,必须有,释放内存
os.remove(spath)
return
'''
rkwdll = CDLL('E:\\BVS\\4-orther\\BVS7\\bin\\decode.dll')
def CdllRun(filePath,dirPath):
filePath = bytes(filePath, 'utf-8')# 可见光源
dirPath = bytes(dirPath, 'utf-8')# 可见光源
rkwdll.decode.restype = c_bool
rkwdll.decode.argtyps = (c_char_p,c_char_p)
print(3)
if rkwdll.decode(filePath,dirPath) == False:
print("解析文件失败")
time.sleep(5)
return
'''
def CdllRun(filePath,dirPath):
run = ("decode.exe "+dirPath +" "+filePath)
run = run.replace("\\", "\\\\")
os.system(run)
p = subprocess.Popen(run)
while p.poll() is None:
time.sleep(1)
def Get_Annex_Message(FilePath, Annex_Path):
try:
fp = open(FilePath, 'rb') #打开任意格式文件,通过email库来判断是否为eml文件
msg = email.message_from_binary_file(fp)
for part in msg.walk(): #循环信件中的每一个mime的数据块
if part.get_content_maintype() == 'multipart':
continue
Annex_name = part.get_filename()
if Annex_name: #如果附件存在名字
fp = open(os.path.join(Annex_Path, Annex_name), 'wb')
fp.write(part.get_payload(decode=True))
except Exception as e:
print(e)
return
#递归文件夹下所有文件
def List_Filepath(Eml_Path, Annex_Path):
for parent,dirnames,filenames in os.walk(Eml_Path): #遍历文件夹
for dirname in dirnames: #对文件夹进行递归
List_Filepath(dirname, Annex_Path)
for filename in filenames: #r对文件进行判断
FilePath = os.path.join(parent,filename)
Get_Annex_Message(FilePath, Annex_Path)
#创建目的文件夹
def Create_Dir(Annex_Path):
if os.path.exists(Annex_Path):
print("dir exists, Annex file will create in %s" % Annex_Path)
else:
os.mkdir(Annex_Path)
Create_Dir(NC_Path) # 创建保存附加的文件夹
# 时间区间二
time_4 = Interval("06:05:30", "06:06:00")
# time_5 = Interval("05:35:30", "05:36:00")
time_10 = Interval("12:05:30", "12:06:00")
#time_11 = Interval("11:35:30", "11:36:00")
time_16 = Interval("18:05:30", "18:06:00")
#time_17 = Interval("17:35:30", "17:36:00")
time_22 = Interval("00:05:30", "00:06:00")
time_22 = Interval("12:48:30", "12:58:00")
while 1:
while 1:
# 当前时间
time.sleep(20)
now_localtime = time.strftime("%H:%M:%S", time.localtime())
# 当前时间(以时间区间的方式表示)
now_time = Interval(now_localtime, now_localtime)
# 方法二:
if now_time in time_4 or now_time in time_10 or now_time in time_16 or now_time in time_22:
break
# Gang Li
root_path_lg = 'Z:\\weather\\212-data\\BVS2\\'
list_lg = os.listdir(root_path_lg)
# print(list_lg)
for folder_lg in list_lg:
if '_1823' in folder_lg:
folder_lg_replace = folder_lg.replace('_1823', '_2023')
print('replace:')
print(folder_lg_replace)
if not os.path.exists(root_path_lg + folder_lg_replace):
shutil.move(root_path_lg + folder_lg, root_path_lg + folder_lg_replace)
else:
shutil.rmtree(root_path_lg + folder_lg)
for folder_lg in list_lg:
ddir = folder_lg
s_lg = folder_lg[-3:]
if s_lg in ['_01', '_02', '_03', '_04', '_05']:
ddir_lg = ddir.replace(s_lg, '_00')
elif s_lg in ['_07', '_08', '_09', '_10', '_11']:
ddir_lg = ddir.replace(s_lg, '_06')
elif s_lg in ['_13', '_14', '_15', '_16', '_17']:
ddir_lg = ddir.replace(s_lg, '_12')
elif s_lg in ['_19', '_20', '_21', '_22', '_23']:
ddir_lg = ddir.replace(s_lg, '_18')
else:
continue
print(root_path_lg + ddir)
print(root_path_lg + ddir_lg)
if not os.path.exists(root_path_lg + ddir_lg):
shutil.move(root_path_lg + ddir, root_path_lg + ddir_lg)
else:
shutil.rmtree(root_path_lg + ddir)
print(now_localtime+"start download")
shutil.rmtree(Annex_Path)
os.mkdir(Annex_Path)
if os.path.exists(AWT_Path):
shutil.rmtree(AWT_Path)
os.mkdir(AWT_Path)
shutil.rmtree(Eml_Path)
os.mkdir(Eml_Path)
init_browser()
print("init_browser finish")
unzip(Load_Path, Eml_Path)
print("unzip finish")
List_Filepath(Eml_Path, Annex_Path)
print("List_Filepath finish")
pathDir = os.listdir(Annex_Path)
for allDir in pathDir:
sdir = os.path.join('%s\%s' % (Annex_Path, allDir))
ddir = os.path.join('%s\%s' % (AWT_Path, allDir[0:len(allDir)-4]))
# By Gang Li.
print('*'*10)
print(ddir)
s_lg = ddir[-3:]
print(s_lg)
if s_lg in ['_00','_01','_02','_03','_04','_05']:
ddir = ddir.replace(s_lg, '_00')
if s_lg in ['_06','_07','_08','_09','_10','_11']:
ddir = ddir.replace(s_lg, '_06')
if s_lg in ['_12','_13','_14','_15','_16','_17']:
ddir = ddir.replace(s_lg, '_12')
if s_lg in ['_18','_19','_20','_21','_22','_23']:
ddir = ddir.replace(s_lg, '_18')
# ddir = ddir.replace("_05", "_06")
# ddir = ddir.replace("_01", "_00")
# ddir = ddir.replace("_07", "_06")
# ddir = ddir.replace("_13", "_12")
# ddir = ddir.replace("_19", "_18")
if os.path.exists(ddir):
continue
print('&'*100)
print(ddir)
Schedule = ddir.split('_')[-2] + ddir.split('_')[-1]
msg = {
"Level": 0,
"Host": 0,
"Process": 15,
"Schedule": Schedule,
"Status": 2, # 16
"Content": "BVS2 data"
}
url = "http://159.226.5.166:7894/api/log"
msg_body = json.dumps(msg).encode(encoding='utf-8')
result = requests.post(url, msg_body)
print("Post data is ", result.status_code)
Create_Dir(ddir)
CdllRun(sdir, ddir)
print(sdir+"decode finish")
msg = {
"Level": 0,
"Host": 0,
"Process": 15,
"Schedule": Schedule,
"Status": 1, # 16
"Content": "BVS2 data"
}
url = "http://159.226.5.166:7894/api/log"
msg_body = json.dumps(msg).encode(encoding='utf-8')
result = requests.post(url, msg_body)
print("Post data is ", result.status_code)
outdir = os.path.join('%s\%s' % (NC_Path, allDir[0:len(allDir)-4]))
# outdir = outdir.replace("_05", "_06")
# outdir = outdir.replace("_01", "_00")
# outdir = outdir.replace("_07", "_06")
# outdir = outdir.replace("_13", "_12")
# outdir = outdir.replace("_19", "_18")
if os.path.exists(outdir):
continue
else:
shutil.copytree(ddir, outdir)
rkwfile = os.path.join('%s\%s' % (os.path.join('%s\%s' % (NC_Path, "rkw")), allDir))
if os.path.exists(rkwfile):
continue
shutil.copy(sdir, rkwfile)
print("copy to nas finish")
# Gang Li
root_path_lg = 'Z:\\weather\\212-data\\BVS2\\'
list_lg = os.listdir(root_path_lg)
# print(list_lg)
# for folder_lg in list_lg:
# if '_1823' in folder_lg:
# folder_lg_replace = folder_lg.replace('_1823', '_2023')
# print('replace:')
# print(folder_lg_replace)
# if not os.path.exists(root_path_lg + folder_lg_replace):
# shutil.move(root_path_lg + folder_lg, root_path_lg + folder_lg_replace)
# else:
# shutil.rmtree(root_path_lg + folder_lg)
for folder_lg in list_lg:
ddir = folder_lg
s_lg = folder_lg[-3:]
if s_lg in ['_01', '_02', '_03', '_04', '_05']:
ddir_lg = ddir.replace(s_lg, '_00')
elif s_lg in ['_07', '_08', '_09', '_10', '_11']:
ddir_lg = ddir.replace(s_lg, '_06')
elif s_lg in ['_13', '_14', '_15', '_16', '_17']:
ddir_lg = ddir.replace(s_lg, '_12')
elif s_lg in ['_19', '_20', '_21', '_22', '_23']:
ddir_lg = ddir.replace(s_lg, '_18')
else:
continue
print(root_path_lg + ddir)
print(root_path_lg + ddir_lg)
if '_1823' in ddir_lg:
ddir_lg = ddir_lg.replace('_1823', '_2023')
if not os.path.exists(root_path_lg + ddir_lg):
shutil.move(root_path_lg + ddir, root_path_lg + ddir_lg)
else:
shutil.rmtree(root_path_lg + ddir)
print("end")
定时爬取邮箱附件(以163邮箱为例)
最新推荐文章于 2023-09-01 17:04:34 发布