制作数据集运用到的一些功能代码

1.批量修改图片文件名

files = os.listdir("F:\\dataset\\tomato_data\\20230417")#原来文件夹的路径
i = 0

for file in files:
    original = "F:\\dataset\\tomato_data\\20230417" + os.sep + files[i]
    #修改后放置图片的路径 F:/ns,也可将 img_ 换成其他标注
    new = "F:\\dataset\\tomato_data\\tomato_datasets" + os.sep + "img_" + str(i + 543) + ".jpg"
    os.rename(original, new)
    i += 1

2.压缩图片(两种方法:opencv和PIL)

import cv2
img=cv2.imread("C:\\Users\\Yellow\\Desktop\\img_1.jpg",1)
cv2.imwrite("C:\\Users\\Yellow\\Desktop\\img_1_12.jpg",img,[cv2.IMWRITE_JPEG_QUALITY,25])
from PIL import Image
import os,shutil

img2 = Image.open("C:\\Users\\Yellow\\Desktop\\img_1.jpg")
img2.save("C:\\Users\\Yellow\\Desktop\\img_1_2.jpg", quality=75, subsampling=0, dpi=(300.0, 300.0))

3.批量压缩图片(PIL)


from PIL import Image
import os, shutil


# 图片压缩批处理
def compressImage(srcPath, dstPath):
    for filename in os.listdir(srcPath):
        # 如果不存在目的目录则创建一个,保持层级结构
        if not os.path.exists(dstPath):
            os.makedirs(dstPath)

        # 拼接完整的文件或文件夹路径
        srcFile = os.path.join(srcPath, filename)
        dstFile = os.path.join(dstPath, filename)

        # 如果是文件就处理
        if os.path.isfile(srcFile):
            try:
                dImg = Image.open(srcFile)
                dImg.save(dstFile, quality=75, subsampling=0, dpi=(300.0, 300.0))
                print(dstFile + " 成功!")
            except Exception:
                print(dstFile + "失败!")

        # 如果是文件夹就递归
        if os.path.isdir(srcFile):
            compressImage(srcFile, dstFile)


if __name__ == '__main__':
    # 遍历待加入图片
    path = os.walk("./prepare")
    for root, dirs, files in path:
        for f in files:
            shutil.move(os.path.join(root, f), os.path.join('./finish', f))  # 移动文件

    # 遍历删除压缩图片
    path = os.walk("./compress")
    for root, dirs, files in path:
        for f in files:
            os.remove(os.path.join(root, f))

    # 遍历压缩图片
    compressImage("./finish", "./compress")
#在代码对应的文件夹下建立三个文件夹,prepare:存放准备压缩的图片;compress:压缩后的图片存放于此;finish:压缩过的图片原图存放于此

参考(搬运)博文:

python 实现图像的无损压缩_python无损压缩图片_Charles.zhang的博客-CSDN博客

Python批量压缩图片_wtyzky的博客-CSDN博客

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
以下是基于 PyTorch 框架,读取表格数据区分数据和标签并制作出训练集和测试集,然后运用 LSTM 模型,对 48 个时间点的单变量时间序列数据,进行 5 个类别分类的代码。 ```python import torch import torch.nn as nn import pandas as pd import numpy as np from sklearn.preprocessing import MinMaxScaler from torch.utils.data import Dataset, DataLoader # 读取数据 data = pd.read_csv('data.csv') # 分离特征和标签 X = data.iloc[:, :-1].values y = data.iloc[:, -1].values # 归一化 sc = MinMaxScaler(feature_range=(0, 1)) X = sc.fit_transform(X) # 划分训练集和测试集 train_size = int(len(X) * 0.8) test_size = len(X) - train_size X_train, X_test = X[0:train_size, :], X[train_size:len(X), :] y_train, y_test = y[0:train_size], y[train_size:len(y)] # 转换数据为张量 X_train = torch.Tensor(X_train).unsqueeze(2) X_test = torch.Tensor(X_test).unsqueeze(2) y_train = torch.Tensor(y_train).type(torch.LongTensor) y_test = torch.Tensor(y_test).type(torch.LongTensor) # 定义数据集和加载器 class TimeSeriesDataset(Dataset): def __init__(self, data, targets): self.data = data self.targets = targets def __getitem__(self, index): return self.data[index], self.targets[index] def __len__(self): return len(self.data) train_dataset = TimeSeriesDataset(X_train, y_train) test_dataset = TimeSeriesDataset(X_test, y_test) train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True) test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False) # 定义 LSTM 模型 class LSTMModel(nn.Module): def __init__(self, input_size=1, hidden_size=64, num_layers=1, num_classes=5): super(LSTMModel, self).__init__() self.hidden_size = hidden_size self.num_layers = num_layers self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True) self.fc = nn.Linear(hidden_size, num_classes) def forward(self, x): h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) out, _ = self.lstm(x, (h0, c0)) out = self.fc(out[:, -1, :]) return out # 定义超参数和设备 input_size = 1 hidden_size = 64 num_layers = 2 num_classes = 5 learning_rate = 0.001 num_epochs = 100 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 实例化模型 model = LSTMModel(input_size, hidden_size, num_layers, num_classes).to(device) # 定义损失函数和优化器 criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # 训练模型 for epoch in range(num_epochs): for i, (data, targets) in enumerate(train_loader): data = data.to(device) targets = targets.to(device) # 前向传播 outputs = model(data) loss = criterion(outputs, targets) # 反向传播并优化 optimizer.zero_grad() loss.backward() optimizer.step() if (i+1) % 10 == 0: print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, i+1, len(train_loader), loss.item())) # 测试模型 with torch.no_grad(): correct = 0 total = 0 for data, targets in test_loader: data = data.to(device) targets = targets.to(device) outputs = model(data) _, predicted = torch.max(outputs.data, 1) total += targets.size(0) correct += (predicted == targets).sum().item() print('Accuracy of the model on the test data: {} %'.format(100 * correct / total)) ``` 以上代码中,首先读取 csv 文件并进行特征和标签的分离,然后使用 `MinMaxScaler` 进行数据归一化,接着使用 `train_test_split` 函数进行训练集和测试集的划分。然后,将数据转换为张量,并定义了一个 `TimeSeriesDataset` 类,并使用 `DataLoader` 将数据集加载到模型中。 接着,定义了一个 `LSTMModel` 类来实现 LSTM 模型,定义了超参数和设备,并将模型实例化。然后,定义了损失函数和优化器,进行模型训练。最后,使用测试数据集测试模型,并输出模型的准确率。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值