nerf-pytorch/torchsearchsorted/examples/test.py

import torch
from torchsearchsorted import searchsorted, numpy_searchsorted
import time

if __name__ == '__main__':
    # defining the number of tests
    ntests = 2

    # defining the problem dimensions
    nrows_a = 50000
    nrows_v = 50000
    nsorted_values = 300
    nvalues = 1000

    # defines the variables. The first run will comprise allocation, the
    # further ones will not
    test_GPU = None
    test_CPU = None

    for ntest in range(ntests):
        print("\nLooking for %dx%d values in %dx%d entries" % (nrows_v, nvalues,
                                                             nrows_a,
                                                             nsorted_values))

        side = 'right'
        # generate a matrix with sorted rows
        a = torch.randn(nrows_a, nsorted_values, device='cpu')
        a = torch.sort(a, dim=1)[0]
        # generate a matrix of values to searchsort
        v = torch.randn(nrows_v, nvalues, device='cpu')

        # a = torch.tensor([[0., 1.]])
        # v = torch.tensor([[1.]])

        t0 = time.time()
        test_NP = torch.tensor(numpy_searchsorted(a, v, side))
        print('NUMPY:  searchsorted in %0.3fms' % (1000*(time.time()-t0)))
        t0 = time.time()
        test_CPU = searchsorted(a, v, test_CPU, side)
        print('CPU:  searchsorted in %0.3fms' % (1000*(time.time()-t0)))
        # compute the difference between both
        error_CPU = torch.norm(test_NP.double()
                               - test_CPU.double()).numpy()
        if error_CPU:
            import ipdb; ipdb.set_trace()
        print('    difference between CPU and NUMPY: %0.3f' % error_CPU)

        if not torch.cuda.is_available():
            print('CUDA is not available on this machine, cannot go further.')
            continue
        else:
            # now do the CPU
            a = a.to('cuda')
            v = v.to('cuda')
            torch.cuda.synchronize()
            # launch searchsorted on those
            t0 = time.time()
            test_GPU = searchsorted(a, v, test_GPU, side)
            torch.cuda.synchronize()
            print('GPU:  searchsorted in %0.3fms' % (1000*(time.time()-t0)))

            # compute the difference between both
            error_CUDA = torch.norm(test_NP.to('cuda').double()
                               - test_GPU.double()).cpu().numpy()

            print('    difference between GPU and NUMPY: %0.3f' % error_CUDA)
v0.1 release to public 4 years ago			`import torch`
			`from torchsearchsorted import searchsorted, numpy_searchsorted`
			`import time`

			`if __name__ == '__main__':`
			`# defining the number of tests`
			`ntests = 2`

			`# defining the problem dimensions`
			`nrows_a = 50000`
			`nrows_v = 50000`
			`nsorted_values = 300`
			`nvalues = 1000`

			`# defines the variables. The first run will comprise allocation, the`
			`# further ones will not`
			`test_GPU = None`
			`test_CPU = None`

			`for ntest in range(ntests):`
			`print("\nLooking for %dx%d values in %dx%d entries" % (nrows_v, nvalues,`
			`nrows_a,`
			`nsorted_values))`

			`side = 'right'`
			`# generate a matrix with sorted rows`
			`a = torch.randn(nrows_a, nsorted_values, device='cpu')`
			`a = torch.sort(a, dim=1)[0]`
			`# generate a matrix of values to searchsort`
			`v = torch.randn(nrows_v, nvalues, device='cpu')`

			`# a = torch.tensor([[0., 1.]])`
			`# v = torch.tensor([[1.]])`

			`t0 = time.time()`
			`test_NP = torch.tensor(numpy_searchsorted(a, v, side))`
			`print('NUMPY: searchsorted in %0.3fms' % (1000*(time.time()-t0)))`
			`t0 = time.time()`
			`test_CPU = searchsorted(a, v, test_CPU, side)`
			`print('CPU: searchsorted in %0.3fms' % (1000*(time.time()-t0)))`
			`# compute the difference between both`
			`error_CPU = torch.norm(test_NP.double()`
			`- test_CPU.double()).numpy()`
			`if error_CPU:`
			`import ipdb; ipdb.set_trace()`
			`print(' difference between CPU and NUMPY: %0.3f' % error_CPU)`

			`if not torch.cuda.is_available():`
			`print('CUDA is not available on this machine, cannot go further.')`
			`continue`
			`else:`
			`# now do the CPU`
			`a = a.to('cuda')`
			`v = v.to('cuda')`
			`torch.cuda.synchronize()`
			`# launch searchsorted on those`
			`t0 = time.time()`
			`test_GPU = searchsorted(a, v, test_GPU, side)`
			`torch.cuda.synchronize()`
			`print('GPU: searchsorted in %0.3fms' % (1000*(time.time()-t0)))`

			`# compute the difference between both`
			`error_CUDA = torch.norm(test_NP.to('cuda').double()`
			`- test_GPU.double()).cpu().numpy()`

			`print(' difference between GPU and NUMPY: %0.3f' % error_CUDA)`