YDOOK-MediAI-制作LSTM数据集-CSDN博客

本文链接：https://blog.csdn.net/FaQianApp/article/details/147478020

Summary

Date : 2025-04-24 14:29:49

Directory h:\Blog Test

Total : 1 files, 36 codes, 3 comments, 18 blanks, all 57 lines

Summary / Details / Diff Summary / Diff Details

Languages

language	files	code	comment	blank	total
Python	1	36	3	18	57

Directories

path	files	code	comment	blank	total
.	1	36	3	18	57

代码

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import torch
from torch.utils.data import DataLoader, TensorDataset
import torch.nn as nn

# 读取Excel文件
file_path = './archive/weather_prediction_dataset.xlsx'  # 替换为你的Excel文件路径
sheet_name = 'Sheet1'  # 如果需要指定工作表名称，请替换此处

df = pd.read_excel(file_path, sheet_name=sheet_name)

print(f'{df = }')


# 确定输入特征列和目标列
features = df[['MONTH', 'BASEL_cloud_cover']]  # 输入特征列，请根据实际数据调整列名
targets = df[['BASEL_humidity']]  # 目标列，请根据实际数据调整列名

# 数据归一化
scaler_x = MinMaxScaler()
scaler_y = MinMaxScaler()

scaled_features = scaler_x.fit_transform(features)
scaled_targets = scaler_y.fit_transform(targets)


def create_dataset(X, y, time_steps=1):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        v = X[i:(i + time_steps)]
        Xs.append(v)
        ys.append(y[i + time_steps])
    return np.array(Xs), np.array(ys)

TIME_STEPS = 6  # 假设我们用过去的6个时间步作为输入
x, y = create_dataset(scaled_features, scaled_targets, TIME_STEPS)


print(f'{x = }')
print(f'{y = }')

print(f'{type(x) = }')
print(f'{type(y) = }')

print(f'{x.shape = }')  # 6x2数据 = 6行 1行2个
print(f'{y.shape = }')  # 1x1数据 = 1行 1行1个

print(f'{x[0] = }')
print(f'{y[0] = }')

print(f'{x[1] = }')
print(f'{y[1] = }')

exit()

输出

df =           DATE  MONTH  BASEL_cloud_cover  BASEL_humidity  BASEL_pressure  ...  TOURS_global_radiation  TOURS_precipitation  TOURS_temp_mean  TOURS_temp_min  TOURS_temp_max
0     20000101      1                  8            0.89          1.0286  ...                    0.25                 0.04              8.5             7.2             9.8
1     20000102      1                  8            0.87          1.0318  ...                    0.17                 0.16              7.9             6.6             9.2
2     20000103      1                  5            0.81          1.0314  ...                    0.27                 0.00              8.1             6.6             9.6
3     20000104      1                  7            0.79          1.0262  ...                    0.11                 0.44              8.6             6.4            10.8
4     20000105      1                  5            0.90          1.0246  ...                    0.39                 0.04              8.0             6.4             9.5
...        ...    ...                ...             ...             ...  ...                     ...                  ...              ...             ...             ...
3649  20091228     12                  7            0.82          1.0084  ...                    0.22                 1.50              6.2             1.8            10.6
3650  20091229     12                  7            0.92          1.0028  ...                    0.24                 0.40             10.4             6.2            14.5
3651  20091230     12                  8            0.92          0.9979  ...                    0.24                 1.00             10.0             8.7            11.3
3652  20091231     12                  7            0.93          0.9958  ...                    0.58                 0.02              8.5             6.2            10.9
3653  20100101      1                  8            0.93          0.9965  ...                    0.11                 0.00              0.5            -0.7             1.8

[3654 rows x 165 columns]
x = array([[[0.   , 1.   ],
        [0.   , 1.   ],
        [0.   , 0.625],
        [0.   , 0.875],
        [0.   , 0.625],
        [0.   , 0.375]],

       [[0.   , 1.   ],
        [0.   , 0.625],
        [0.   , 0.875],
        [0.   , 0.625],
        [0.   , 0.375],
        [0.   , 1.   ]],

       [[0.   , 0.625],
        [0.   , 0.875],
        [0.   , 0.625],
        [0.   , 0.375],
        [0.   , 1.   ],
        [0.   , 0.5  ]],

       ...,

       [[1.   , 1.   ],
        [1.   , 0.75 ],
        [1.   , 0.5  ],
        [1.   , 0.875],
        [1.   , 0.875],
        [1.   , 0.875]],

       [[1.   , 0.75 ],
        [1.   , 0.5  ],
        [1.   , 0.875],
        [1.   , 0.875],
        [1.   , 0.875],
        [1.   , 1.   ]],

       [[1.   , 0.5  ],
        [1.   , 0.875],
        [1.   , 0.875],
        [1.   , 0.875],
        [1.   , 1.   ],
        [1.   , 0.875]]])
y = array([[0.76666667],
       [0.68333333],
       [0.83333333],
       ...,
       [0.9       ],
       [0.91666667],
       [0.91666667]])
type(x) = <class 'numpy.ndarray'>
type(y) = <class 'numpy.ndarray'>
x.shape = (3648, 6, 2)
y.shape = (3648, 1)
x[0] = array([[0.   , 1.   ],
       [0.   , 1.   ],
       [0.   , 0.625],
       [0.   , 0.875],
       [0.   , 0.625],
       [0.   , 0.375]])
y[0] = array([0.76666667])
x[1] = array([[0.   , 1.   ],
       [0.   , 0.625],
       [0.   , 0.875],
       [0.   , 0.625],
       [0.   , 0.375],
       [0.   , 1.   ]])
y[1] = array([0.68333333])