Coder Social home page Coder Social logo

crosspy's Introduction

crosspy's People

Contributors

bozhiyou avatar

Stargazers

Milos Gligoric avatar  avatar Hochan Lee avatar

Watchers

 avatar  avatar Kostas Georgiou avatar

crosspy's Issues

Slices of 1D arrays seem to be treated as 2D Arrays.

Slices of 1D arrays seem to be treated as 2D Arrays.

Error

Traceback (most recent call last):
  File "/work2/06081/wlruys/frontera/workspace/parla-experimental/minimal.py", line 38, in <module>
    main(T)
  File "/work2/06081/wlruys/frontera/workspace/parla-experimental/minimal.py", line 31, in main
    print("First Element: ", slicedA[0])
  File "/work2/06081/wlruys/frontera/mambaforge/envs/parla/lib/python3.10/site-packages/crosspy/core/ndarray.py", line 412, in __getitem__
    raise NotImplementedError("Only implemented for 1-D")
   

Minimal Example

# from parla import Parla, spawn, TaskSpace
import argparse
import cupy as cp
import crosspy as xp

parser = argparse.ArgumentParser()
parser.add_argument("-dev_config", type=str, default="devices_sample.YAML")
parser.add_argument("-num_gpus", type=int, default=2)
args = parser.parse_args()

def main(T):

    # Per device size
    m = 3

    # Initilize a CrossPy Array
    cupy_list_A = []
    cupy_list_B = []
    for i in range(args.num_gpus):
        with cp.cuda.Device(0):
            random_array = cp.random.randint(0, 100, size=m)
            random_array = random_array.astype(cp.int32)

            cupy_list_A.append(random_array)
            cupy_list_B.append(cp.zeros(m, dtype=cp.int32))

    xA = xp.array(cupy_list_A, dim=0)
    xp.array(cupy_list_B, dim=0)

    slicedA = xA[slice(0, 1)]
    print("First Element: ", slicedA[0])
    print("First Element on CPU: ", slicedA[0].get(-1))



if __name__ == "__main__":
    T = None
    main(T)

Full Example Script

# from parla import Parla, spawn, TaskSpace
import argparse
import cupy as cp
import numpy as np
import crosspy as xp

parser = argparse.ArgumentParser()
parser.add_argument("-dev_config", type=str, default="devices_sample.YAML")
parser.add_argument("-num_gpus", type=int, default=2)
args = parser.parse_args()

np.random.seed(10)
cp.random.seed(10)

# TODO(wlr): Fuse this kernel
def partition_kernel(A, B, comp, pivot):
    comp[:] = (A < pivot)
    mid = comp.sum()
    B[:mid] = A[comp]
    B[mid:] = A[~comp]
    return mid


def partition(xA, xB, pivot):
    n_partitions = len(xA.values())
    mid = np.zeros(n_partitions+1, dtype=np.uint32)

    for i, (array_in, array_out) in enumerate(zip(xA.values(), xB.values())):
        with cp.cuda.Device(0):
            comp = cp.empty_like(array_in, dtype=cp.bool_)
            mid[i+1] = partition_kernel(array_in, array_out, comp, pivot)
    return mid


# TODO(wlr): Fuse this kernel, pack this better?
def scatter(splits, xA, xB):
    """
    :param xA, xB,  sliced crosspy arrays for the active element set
    :param splits, a host ndarray of how many left elements are in each partition 
    """

    n_partitions = len(xA.values())

    sizes = np.zeros(n_partitions+1, dtype=np.uint32)
    for i, array in enumerate(xB.values()):
        sizes[i+1] = len(array)
    
    size_prefix = np.cumsum(sizes)
    left_prefix = np.cumsum(splits)

    right_prefix = size_prefix - left_prefix
    local_split = np.sum(splits)

    right_prefix += local_split

    print("Size Prefix: ", size_prefix)
    print("Left Prefix: ", left_prefix)
    print("Right Prefix: ", right_prefix)

    for i in range(n_partitions):

        print(f"Performing left copy from Partition {i}: ", (left_prefix[i], left_prefix[i+1]), (size_prefix[i], size_prefix[i] + splits[i+1]))


        # Write left
        if splits[i+1] > 0:
            xA[left_prefix[i]:left_prefix[i+1]] = xB[size_prefix[i]:size_prefix[i]+splits[i+1]]


        print(f"Performing right copy from Partition {i}: ", (right_prefix[i], right_prefix[i+1]), (size_prefix[i]+splits[i+1], size_prefix[i+1]))
        # Write right
        if (sizes[i+1] - splits[i+1]) > 0:
            xA[right_prefix[i]:right_prefix[i+1]] = xB[size_prefix[i]+splits[i+1]:size_prefix[i+1]]

    return local_split

def quicksort(xA, xB, active_slice, T):

    print("----------------------")
    print("Starting Partition on Slice: ", active_slice)
    n_partitions = len(xA.values())
    print("CrossPy has n_partitions: ", n_partitions)

    active_A = xA[active_slice]
    active_B = xB[active_slice]

    pivot_idx = np.random.randint(0, len(active_A))

    print("Active partition has shape: ", active_A.shape)
    print("Active partition has len: ", len(active_A))

    print("The chosen pivot index is: ", pivot_idx)
    pivot = (int)(active_A[pivot_idx].to(-1))

    print("The chosen pivot is: ", pivot)

    # local partition
    print("Performing local partition...")
    splits = partition(active_A, active_B, pivot)
    print("Found the following splits: ", splits)

    # Scatter to other partitions
    print("Performing local scatter...")
    local_split = scatter(splits, active_A, active_B)


    #form slices to pass to children
    previous_start = active_slice.start 
    previous_end = active_slice.stop

    left_start = (int)(previous_start)
    left_end = (int)(previous_start + local_split)
    left_slice = slice(left_start, left_end)

    right_start = (int)(previous_start + local_split)
    right_end = (int)(previous_end)
    right_slice = slice(right_start, right_end)



    quicksort(xA, xB, left_slice, T)
    quicksort(xA, xB, right_slice, T)




def main(T):

    # Per device size
    m = 3

    # Initilize a CrossPy Array
    cupy_list_A = []
    cupy_list_B = []
    for i in range(args.num_gpus):
        with cp.cuda.Device(0):
            random_array = cp.random.randint(0, 100, size=m)
            random_array = random_array.astype(cp.int32)

            cupy_list_A.append(random_array)
            cupy_list_B.append(cp.zeros(m, dtype=cp.int32))

    xA = xp.array(cupy_list_A, dim=0)
    xB = xp.array(cupy_list_B, dim=0)

    print("Original Array: ", xA)
    quicksort(xA, xB, slice(0, len(xA)), T)

    print("Sorted: ", xA)


if __name__ == "__main__":
    T = None
    main(T)
    

Copying a slice into a CrossPy array.

Question

There might be a bug in __setitem__ if I'm using this correctly.

For slicing into an array, neither:
A[a_start:a_end] = B[b_start:b_end] where the slices have the same length for A and B CrossPy
nor
A[a_start:a_end] = cupy_b[b_start:b_end] where those slices have the same length

Seem to work reliably.

Example

# from parla import Parla, spawn, TaskSpace
import argparse
import cupy as cp
import numpy as np
import crosspy as xp
from crosspy import cpu, gpu

parser = argparse.ArgumentParser()
parser.add_argument("-dev_config", type=str, default="devices_sample.YAML")
parser.add_argument("-num_gpus", type=int, default=2)
args = parser.parse_args()

np.random.seed(10)
cp.random.seed(10)

# TODO(wlr): Fuse this kernel


def partition_kernel(A, B, comp, pivot):
    comp[:] = (A < pivot)
    mid = comp.sum()
    B[:mid] = A[comp]
    B[mid:] = A[~comp]
    return mid


def partition(xA, xB, pivot):
    n_partitions = len(xA.values())
    mid = np.zeros(n_partitions+1, dtype=np.uint32)

    for i, (array_in, array_out) in enumerate(zip(xA.values(), xB.values())):
        with cp.cuda.Device(0):
            comp = cp.empty_like(array_in, dtype=cp.bool_)
            mid[i+1] = partition_kernel(array_in, array_out, comp, pivot)
    return mid


# TODO(wlr): Fuse this kernel, pack this better?
def scatter(xA, xB, mid):

    sizes = np.zeros(len(xA.values())+1, dtype=np.uint32)
    for i, array in enumerate(xB.values()):
        sizes[i+1] = len(array)

    size_prefix = np.cumsum(sizes)
    left_prefix = np.cumsum(mid)
    right_prefix = size_prefix - left_prefix
    global_left = np.sum(mid)
    right_prefix += global_left
    print(size_prefix, left_prefix, right_prefix, mid, global_left)

    for i, array in enumerate(xB.values()):
        # Write left
        print("left", left_prefix[i], left_prefix[i+1], mid[i+1])
        if mid[i+1] > 0:
            print("A", xA[left_prefix[i]:left_prefix[i+1]],
                  len(xA[left_prefix[i]:left_prefix[i+1]]))
            print("B local", array[:mid[i+1]], len(array[:mid[i+1]]))
            print("B global", xB[size_prefix[i]:size_prefix[i+1]+mid[i+1]], len(xB[size_prefix[i]:size_prefix[i+1]+mid[i+1]]))

            # QUESTION: How can I perform this copy?
            # xA[left_prefix[i]:left_prefix[i+1]] = array[:mid[i+1]]
            xA[left_prefix[i]:left_prefix[i+1]
               ] = xB[size_prefix[i]:size_prefix[i+1]+mid[i+1]]
        # Write right
        print("right", right_prefix[i], right_prefix[i+1])

        if (sizes[i+1] - mid[i+1]) > 0:
            print("A", xA[right_prefix[i]:right_prefix[i+1]], len(xA[right_prefix[i]:right_prefix[i+1]]))
            print("B local", array[mid[i+1]:], len(array[mid[i+1]:]))
            print("B global", xB[size_prefix[i]+mid[i+1]:size_prefix[i+1]],
                  len(xB[size_prefix[i]+mid[i+1]:size_prefix[i+1]]))

            # QUESTION: How can I perform this copy?
            # xA[right_prefix[i]:right_prefix[i+1]] = array[mid[i+1]:]
            xA[left_prefix[i]:left_prefix[i+1]
               ] = xB[size_prefix[i]+mid[i+1]:size_prefix[i+1]]

    if global_left > 0:
        print("Array left", xA[:global_left])
    if (len(xA) - global_left) > 0:
        print("Array right", xA[global_left:])


def quicksort(xA, xB, slice, T):

    n_partitions = len(xA.values())

    active_A = xA[slice]
    active_B = xB[slice]

    N = len(active_A)
    pivot = (int)(active_A[N-1].to(-1))

    print(N, n_partitions, pivot)

    # local partition
    mid = partition(active_A, active_B, pivot)

    # Scatter to other partitions
    scatter(active_A, active_B, mid)


def main(T):

    # Per device size
    m = 5

    # Initilize a CrossPy Array
    cupy_list_A = []
    cupy_list_B = []
    for i in range(args.num_gpus):
        with cp.cuda.Device(0):
            random_array = cp.random.randint(0, 100, size=m)
            random_array = random_array.astype(cp.int32)

            cupy_list_A.append(random_array)
            cupy_list_B.append(cp.zeros(m, dtype=cp.int32))

    xA = xp.array(cupy_list_A)
    xB = xp.array(cupy_list_B)

    xA = xA.values()[0]
    xB = xB.values()[0]

    quicksort(xA, xB, slice(0, len(xA)), T)


if __name__ == "__main__":
    T = None
    main(T)

Interoperability with Parla Array type.

Overview

I can't seem to create a CrossPy array of PArrays and get consistent slicing support.
We need a way to return slices of PArrays as the underlying type on CrossPy arrays.
Do you have any advice on this?

Example - Output

Testing PArray Construction
Converted Parray:  {0: array([0, 1, 2, 3, 4]), -1: None}
New PArray:  {0: None, -1: array([0, 1, 2, 3, 4])}

Testing CrossPy Construction

Case 1: Partitioning Init w/ Wrapper
Partition Init:  array {((0, 5),): <parla.common.parray.memory.MultiDeviceBuffer object at 0x7fd37122a5f0>, ((5, 10),): <parla.common.parray.memory.MultiDeviceBuffer object at 0x7fd37122a8f0>}
Internals: 
Partition 0: {0: array([0, 1, 2, 3, 4]), -1: None} : <class 'parla.common.parray.core.PArray'>
   Parent: 140546112923120
Partition 1: {0: array([5, 6, 7, 8, 9]), -1: None} : <class 'parla.common.parray.core.PArray'>
   Parent: 140546112923888

Case 2: Cupy Init w/ Wrapper
Cupy Init:  array {((0, 10),): array {((0, 5),): array([0, 1, 2, 3, 4]), ((5, 10),): array([0, 1, 2, 3, 4])}}
Internals: 
//Not PArray type (this type of init doesn't work)
Partition 0: array {((0, 5),): array([0, 1, 2, 3, 4]), ((5, 10),): array([0, 1, 2, 3, 4])} : <class 'crosspy.core.ndarray.CrossPyArray'>
Failed!  'CrossPyArray' object has no attribute 'parent_ID'


Case 3: PArray Init w/o wrapper
PArray List:  [<parla.common.parray.memory.MultiDeviceBuffer object at 0x7fd37122a1d0>, <parla.common.parray.memory.MultiDeviceBuffer object at 0x7fd37122aa70>]
Failed!  Can't create this 

Case 4: PArray Init w/ wrapper
PArray List:  [<parla.common.parray.memory.MultiDeviceBuffer object at 0x7fd37122acb0>, <parla.common.parray.memory.MultiDeviceBuffer object at 0x7fd37122ae00>]
Failed!  Can't create this 

Testing Slicing

Test slicing with Case 1. (Partitioning Input)
Test  Slice: slice(0, 3, None)
Sliced:  array {((0, 3),): <parla.common.parray.memory.MultiDeviceBuffer object at 0x7fd37122a5f0>}
Internals: 
Partition 0: {0: array([0, 1, 2, 3, 4]), -1: None} : <class 'parla.common.parray.core.PArray'>

//The type is correct, but slicing across PArrays doesn't work. 
Test  Slice: slice(0, 7, None)
Failed!  can't convert negative value to uint64_t

Test slicing with Case 2. (Cupy Input)
Test  Slice: slice(0, 3, None)
Sliced:  array {((0, 3),): <parla.common.parray.memory.MultiDeviceBuffer object at 0x7fd37122a5f0>}
Internals: 
Partition 0: array {((0, 3),): array([0, 1, 2])} : <class 'crosspy.core.ndarray.CrossPyArray'>
Partition Partition 0 : [0 1 2] : <class 'cupy.ndarray'>

Test  Slice: slice(0, 7, None)
Sliced:  array {((0, 7),): array {((0, 5),): array([0, 1, 2, 3, 4]), ((5, 7),): array([0, 1])}}
Internals: 
Partition 0: array {((0, 5),): array([0, 1, 2, 3, 4]), ((5, 7),): array([0, 1])} : <class 'crosspy.core.ndarray.CrossPyArray'>

//These should be PArray
Partition Partition 0 : [0 1 2 3 4] : <class 'cupy.ndarray'>
Partition Partition 1 : [0 1] : <class 'cupy.ndarray'>

Example - Source

import numpy as np
import cupy as cp
import crosspy as xp
from crosspy import gpu
from parla import parray

as_parray = parray.asarray

cupy1 = cp.arange(5)
cupy2 = cp.arange(5)

print("Testing PArray Construction")
convert_parray_test = as_parray(cupy1)
print("Converted Parray: ", convert_parray_test)

new_parray_test = parray.array(np.arange(5))
print("New PArray: ", new_parray_test)
print()

print("Testing CrossPy Construction")
print()
print("Case 1: Partitioning Init w/ Wrapper")
try:
    A = xp.array(range(10), placement=[gpu(0), gpu(0)], wrapper=as_parray)
    print("Partition Init: ", A)
    print("Internals: ")
    for i, array in enumerate(A.values()):
        print(f"Partition {i}: {array} : {type(array)}")
        print(f"   Parent: {array.parent_ID}")


except Exception as e:
    print("Failed! ", e)

print()
print("Case 2: Cupy Init w/ Wrapper")
try:
    B = xp.array([cupy1, cupy2], wrapper=as_parray)
    print("Cupy Init: ", B)
    print("Internals: ")

    for i, array in enumerate(B.values()):
        print(f"Partition {i}: {array} : {type(array)}")
        print(f"   Parent: {array.parent_ID}")
except Exception as e:
    print("Failed! ", e)


# Case 3: Parray Init
print()
print(
    "Case 3: PArray Init w/o wrapper (not needed I was just curious if this was valid)"
)
try:
    parray_list = parray.asarray_batch([cupy1, cupy2])
    print("PArray List: ", parray_list)

    C = xp.array(parray_list)
    print("PArray List Init: ", C)
except Exception as e:
    print("Failed! ", e)

# Case 4: Parray Init
print()
print(
    "Case 4: PArray Init w/ wrapper (not needed I was just curious if this was valid)"
)
try:
    parray_list = parray.asarray_batch([cupy1, cupy2])
    print("PArray List: ", parray_list)

    D = xp.array(parray_list, wrapper=as_parray)
    print("PArray List Init: ", C)
except Exception as e:
    print("Failed! ", e)


print()
print("Testing Slicing")

# Slicing PArrays
print()
print("Test slicing with Case 1. (Partitioning Input)")
try:
    s = slice(0, 3)
    print("Test  Slice:", s)
    sliced_A = A[s]
    print("Sliced: ", sliced_A)

    print("Internals: ")
    for i, array in enumerate(sliced_A.values()):
        # This does not print what I expect it references the original unsliced parts?
        print(f"Partition {i}: {array} : {type(array)}")
        # print(f"   Parent: {array.parent_ID}")

except Exception as e:
    print("Failed! ", e)

print()

try:
    s = slice(0, 7)
    print("Test  Slice:", s)
    sliced_A = A[s]
    print("Sliced: ", sliced_A)

    print("Internals: ")

    for i, array in enumerate(sliced_A.values()):
        print(f"Partition {i}: {array} : {type(array)}")
        # print(f"   Parent: {array.parent_ID}")

except Exception as e:
    print("Failed! ", e)


# Slicing PArrays
print()
print("Test slicing with Case 2. (Cupy Input)")
try:
    s = slice(0, 3)
    print("Test  Slice:", s)
    sliced_B = B[s]
    print("Sliced: ", sliced_A)

    print("Internals: ")
    for i, array in enumerate(sliced_B.values()):
        print(f"Partition {i}: {array} : {type(array)}")
        # print(f"   Parent: {array.parent_ID}")
        for j, subarray in enumerate(array.values()):
            print(f"Partition Partition {j} : {subarray} : {type(subarray)}")

            # This should still be PArray type so automatic movement can be handled.
            # assert type(subarray) == type(new_parray_test)
            # print(f"   Parent: {array.parent_ID}")


except Exception as e:
    print("Failed! ", e)

print()

try:
    s = slice(0, 7)
    print("Test  Slice:", s)
    sliced_B = B[s]
    print("Sliced: ", sliced_B)

    print("Internals: ")
    for i, array in enumerate(sliced_B.values()):
        print(f"Partition {i}: {array} : {type(array)}")
        for j, subarray in enumerate(array.values()):
            print(f"Partition Partition {j} : {subarray} : {type(subarray)}")

            # This should still be PArray type so automatic movement can be handled.
            # assert type(subarray) == type(new_parray_test)
            # print(f"   Parent: {array.parent_ID}")


except Exception as e:
    print("Failed! ", e)

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    ๐Ÿ–– Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. ๐Ÿ“Š๐Ÿ“ˆ๐ŸŽ‰

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google โค๏ธ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.