torch.compile test

import torch

class MyModule(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.lin = torch.nn.Linear(100, 10)

    def forward(self, x):
        return torch.nn.functional.relu(self.lin(x))

mod = MyModule()
# opt_mod = torch.compile(mod)
# print(opt_mod(torch.randn(10, 100)))

# Returns the result of running `fn()` and the time it took for `fn()` to run,
# in seconds. We use CUDA events and synchronization for the most accurate
# measurements.
def timed(fn):
    start = torch.cuda.Event(enable_timing=True)
    end = torch.cuda.Event(enable_timing=True)
    start.record()
    result = fn()
    end.record()
    torch.cuda.synchronize()
    return result, start.elapsed_time(end) / 1000

# Generates random input and targets data for the model, where `b` is
# batch size.
def generate_data(b):
    return (
        torch.randn(b, 3, 128, 128).to(torch.float32).cuda(),
        torch.randint(1000, (b,)).cuda(),
    )

N_ITERS = 10

from torchvision.models import resnet18
def init_model():
    return resnet18().to(torch.float32).cuda()




model = init_model()
opt = torch.optim.Adam(model.parameters())

def train(mod, data):
    opt.zero_grad(True)
    pred = mod(data[0])
    loss = torch.nn.CrossEntropyLoss()(pred, data[1])
    loss.backward()
    opt.step()

eager_times = []
for i in range(N_ITERS):
    inp = generate_data(16)
    _, eager_time = timed(lambda: train(model, inp))
    eager_times.append(eager_time)
    print(f"eager train time {i}: {eager_time}")
print("~" * 10)

model = init_model()
opt = torch.optim.Adam(model.parameters())
train_opt = torch.compile(train, mode="reduce-overhead")

compile_times = []
for i in range(N_ITERS):
    inp = generate_data(16)
    _, compile_time = timed(lambda: train_opt(model, inp))
    compile_times.append(compile_time)
    print(f"compile train time {i}: {compile_time}")
print("~" * 10)

eager_med = np.median(eager_times)
compile_med = np.median(compile_times)
speedup = eager_med / compile_med
print(f"(train) eager median: {eager_med}, compile median: {compile_med}, speedup: {speedup}x")
print("~" * 10)
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
def define_cnn_model(): # 使用Sequential序列模型 model = Sequential() # 卷积层 model.add(Conv2D(32,(3,3),activation="relu",padding="same",input_shape=(200,200,3))) # 第一层即为卷积层,要设置输入进来图片的样式 3是颜色通道个数 # 最大池化层 model.add(MaxPool2D((2,2))) # 池化窗格 model.add(Conv2D(64,(3,3),activation="relu",padding="same",input_shape=(200,200,3))) # 第一层即为卷积层,要设置输入进来图片的样式 3是颜色通道个数 # 最大池化层 model.add(MaxPool2D((2,2))) # 池化窗格 model.add(Conv2D(128,(3,3),activation="relu",padding="same",input_shape=(200,200,3))) # 第一层即为卷积层,要设置输入进来图片的样式 3是颜色通道个数 # 最大池化层 model.add(MaxPool2D((2,2))) # 池化窗格 model.add(Flatten()) # Flatten层 # 全连接层 model.add(Dense(128,activation="relu")) # 128为神经元的个数 model.add(Dense(1,activation="sigmoid")) # 编译模型 opt = SGD(lr= 0.001,momentum=0.9) # 随机梯度 model.compile(optimizer=opt,loss="binary_crossentropy",metrics=["accuracy"]) return model def train_cnn_model(): # 实例化模型 model = define_cnn_model() # 创建图片生成器 datagen = ImageDataGenerator(rescale=1.0/255.0) train_it = datagen.flow_from_directory( r"../Test1/Train", class_mode="binary", batch_size=64, target_size=(200, 200)) # batch_size:一次拿出多少张照片 targe_size:将图片缩放到一定比例 # 训练模型 model.fit(train_it, steps_per_epoch=len(train_it), epochs=20, verbose=1) model.save("my_model.h5") torch.cuda.set_device(0) train_cnn_model() 将上述代码的训练过程绘图
06-13

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值