You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

67 lines
2.4 KiB
Python

import torch
from torchsearchsorted import searchsorted, numpy_searchsorted
import time
if __name__ == '__main__':
# defining the number of tests
ntests = 2
# defining the problem dimensions
nrows_a = 50000
nrows_v = 50000
nsorted_values = 300
nvalues = 1000
# defines the variables. The first run will comprise allocation, the
# further ones will not
test_GPU = None
test_CPU = None
for ntest in range(ntests):
print("\nLooking for %dx%d values in %dx%d entries" % (nrows_v, nvalues,
nrows_a,
nsorted_values))
side = 'right'
# generate a matrix with sorted rows
a = torch.randn(nrows_a, nsorted_values, device='cpu')
a = torch.sort(a, dim=1)[0]
# generate a matrix of values to searchsort
v = torch.randn(nrows_v, nvalues, device='cpu')
# a = torch.tensor([[0., 1.]])
# v = torch.tensor([[1.]])
t0 = time.time()
test_NP = torch.tensor(numpy_searchsorted(a, v, side))
print('NUMPY: searchsorted in %0.3fms' % (1000*(time.time()-t0)))
t0 = time.time()
test_CPU = searchsorted(a, v, test_CPU, side)
print('CPU: searchsorted in %0.3fms' % (1000*(time.time()-t0)))
# compute the difference between both
error_CPU = torch.norm(test_NP.double()
- test_CPU.double()).numpy()
if error_CPU:
import ipdb; ipdb.set_trace()
print(' difference between CPU and NUMPY: %0.3f' % error_CPU)
if not torch.cuda.is_available():
print('CUDA is not available on this machine, cannot go further.')
continue
else:
# now do the CPU
a = a.to('cuda')
v = v.to('cuda')
torch.cuda.synchronize()
# launch searchsorted on those
t0 = time.time()
test_GPU = searchsorted(a, v, test_GPU, side)
torch.cuda.synchronize()
print('GPU: searchsorted in %0.3fms' % (1000*(time.time()-t0)))
# compute the difference between both
error_CUDA = torch.norm(test_NP.to('cuda').double()
- test_GPU.double()).cpu().numpy()
print(' difference between GPU and NUMPY: %0.3f' % error_CUDA)