#coding=UTF-8
import numpy as np
import torch
from torch.autograd import Variable
import time
height = 4000
width = 3000
height2 = 6000
width2 = 5000
a = np.random.random((height,width))
b = np.random.random((width,height2))
e = np.random.random((height2,width2))
time_start1 = time.time()
c = np.dot(a,b)
time_end1 = time.time()
time_out1 = time_end1 - time_start1
print('cpu: time_out: ',time_out1)
time_start2 = time.time()
a1 = torch.from_numpy(a)
b1 = torch.from_numpy(b)
e1 = torch.from_numpy(e)
a1 = a1.cuda() # copy cpu to gpu
b1 = b1.cuda() # copy cpu to gpu
checked = False
if not checked:
e1 = e1.cuda() # copy cpu to gpu, change e1.cuda()
time_start3 = time.time()
c1 = torch.matmul(a1,b1)
rest = torch.matmul(c1,e1) # test where data-in-cpu can multipy data-in-gpu
time_end3 = time.time()
time_out3 = time_end3 - time_start3
print('gpu: time_out: ',time_out3)
c1 = c1.cpu() # copy gpu to cpu
time_end2 = time.time()
time_out2 = time_end2 - time_start2
print('gpu total: time_out: ',time_out2)
############## print few results ##############
print('cpu: c: ',c[0,0:5])
print('gpu: c1: ',c[0,0:5])
print('cpu: c1: ',c[0,0:5])
result:
cpu: time_out: 0.8587839603424072
gpu: time_out: 0.15541672706604004
gpu total: time_out: 2.954385280609131
cpu: c: [ 759.56254243 751.99849962 735.25157129 735.34527291 730.15037367]
gpu: c1: [ 759.56254243 751.99849962 735.25157129 735.34527291 730.15037367]
cpu: c1: [ 759.56254243 751.99849962 735.25157129 735.34527291 730.15037367]