这个问题的原因就是pandas 2.0以上版本就不在使用了
我之前的程序是这样的:
def generate_train_val_test(args):
# train = None
# test = None
# print("train 的类型:", type(train))
# print("test 的类型:", type(test))
if '.pkl' in args.train_path:
train = pd.DataFrame(pd.read_pickle(args.train_path))
test = pd.DataFrame(pd.read_pickle(args.test_path))
df = train.append([test]).reset_index(drop=True)
num_test = len(test)
else:
train = pd.read_csv(args.train_path)
test = pd.read_csv(args.test_path)
df = train.append([test]).reset_index(drop=True)
num_test = len(test)
#df = df.round(6)
# 0 is the latest observed sample.
x_offsets = np.sort(
# np.concatenate(([-week_size + 1, -day_size + 1], np.arange(-11, 1, 1)))
np.concatenate((np.arange(-(args.window_size-1), 1, 1),))
)
# Predict the next one hour
y_offsets = np.sort(np.arange(1, 2, 1))
# x: (num_samples, input_length, num_nodes, input_dim)
# y: (num_samples, output_length, num_nodes, output_dim)
x, y = generate_graph_seq2seq_io_data(
df,
x_offsets=x_offsets,
y_offsets=y_offsets,
add_time_in_day=False,
add_day_in_week=False,
)
print("x shape: ", x.shape, ", y shape: ", y.shape)
# Write the data into npz file.
# num_test = 6831, using the last 6831 examples as testing.
# for the rest: 7/8 is used for training, and 1/8 is used for validation.
num_samples = x.shape[0]
train_samples = num_samples - num_test
num_train = round(train_samples * (1-args.val_ratio))
num_val = train_samples - num_train
# train
x_train, y_train = x[:num_train], y[:num_train]
# val
x_val, y_val = (
x[num_train: num_train + num_val],
y[num_train: num_train + num_val],
)
# test
x_test, y_test = (
x[-num_test: ],
y[-num_test: ],
)
for cat in ["train", "val","test"]:
_x, _y = locals()["x_" + cat], locals()["y_" + cat]
print(cat, "x: ", _x.shape, "y:", _y.shape)
np.savez_compressed(
os.path.join(args.output_dir, "%s.npz" % cat),
x=_x,
y=_y,
x_offsets=x_offsets.reshape(list(x_offsets.shape) + [1]),
y_offsets=y_offsets.reshape(list(y_offsets.shape) + [1]),
)
用 pd.concat
函数来替代 append
方法
pd.concat
函数用于将多个 DataFrame 沿着指定轴进行连接
下面是我代替之后修改的
成功了!!!
def generate_train_val_test(args):
if '.pkl' in args.train_path:
train = pd.DataFrame(pd.read_pickle(args.train_path))
test = pd.DataFrame(pd.read_pickle(args.test_path))
df = pd.concat([train, test], ignore_index=True)
num_test = len(test)
else:
train = pd.read_csv(args.train_path)
test = pd.read_csv(args.test_path)
df = pd.concat([train, test], ignore_index=True)
num_test = len(test)
x_offsets = np.sort(
np.concatenate((np.arange(-(args.window_size-1), 1, 1),))
)
y_offsets = np.sort(np.arange(1, 2, 1))
x, y = generate_graph_seq2seq_io_data(
df,
x_offsets=x_offsets,
y_offsets=y_offsets,
add_time_in_day=False,
add_day_in_week=False,
)
print("x shape: ", x.shape, ", y shape: ", y.shape)
num_samples = x.shape[0]
train_samples = num_samples - num_test
num_train = round(train_samples * (1-args.val_ratio))
num_val = train_samples - num_train
x_train, y_train = x[:num_train], y[:num_train]
x_val, y_val = (
x[num_train: num_train + num_val],
y[num_train: num_train + num_val],
)
x_test, y_test = (
x[-num_test: ],
y[-num_test: ],
)
for cat in ["train", "val","test"]:
_x, _y = locals()["x_" + cat], locals()["y_" + cat]
print(cat, "x: ", _x.shape, "y:", _y.shape)
np.savez_compressed(
os.path.join(args.output_dir, "%s.npz" % cat),
x=_x,
y=_y,
x_offsets=x_offsets.reshape(list(x_offsets.shape) + [1]),
y_offsets=y_offsets.reshape(list(y_offsets.shape) + [1]),
)