import numpy as np
from mlir_graphblas import MlirJitEngine
from mlir_graphblas.tests.jit_engine_test_utils import *
engine = MlirJitEngine()
mlir_text = """
#CSR64 = #sparse_tensor.encoding<{
dimLevelType = [ "dense", "compressed" ],
dimOrdering = affine_map<(i,j) -> (i,j)>,
pointerBitWidth = 64,
indexBitWidth = 64
}>
#CSC64 = #sparse_tensor.encoding<{
dimLevelType = [ "dense", "compressed" ],
dimOrdering = affine_map<(i,j) -> (j,i)>,
pointerBitWidth = 64,
indexBitWidth = 64
}>
#SparseVec64 = #sparse_tensor.encoding<{
dimLevelType = [ "compressed" ],
pointerBitWidth = 64,
indexBitWidth = 64
}>
module {
func private @vector_empty(tensor<?x?xf64, #CSR64>, index) -> tensor<?xf64, #SparseVec64>
func private @vector_resize_dim(tensor<?xf64, #SparseVec64>, index, index) -> ()
func private @vector_resize_pointers(tensor<?xf64, #SparseVec64>, index, index) -> ()
func private @vector_resize_index(tensor<?xf64, #SparseVec64>, index, index) -> ()
func private @vector_resize_values(tensor<?xf64, #SparseVec64>, index) -> ()
func @mat_to_vec(%matrix: tensor<?x?xf64, #CSR64>) -> tensor<?xf64, #SparseVec64> {
%c1_i1 = constant 1 : i1
%c0_f64 = constant 0.0 : f64
%c0 = constant 0 : index
%c1 = constant 1 : index
%c2 = constant 2 : index
%nrows = tensor.dim %matrix, %c0 : tensor<?x?xf64, #CSR64>
%matrix_pointers = sparse_tensor.pointers %matrix, %c1 : tensor<?x?xf64, #CSR64> to memref<?xi64>
%matrix_indices = sparse_tensor.indices %matrix, %c1 : tensor<?x?xf64, #CSR64> to memref<?xi64>
%matrix_values = sparse_tensor.values %matrix : tensor<?x?xf64, #CSR64> to memref<?xf64>
%output = call @vector_empty(%matrix, %c1) : (tensor<?x?xf64, #CSR64>, index) -> tensor<?xf64, #SparseVec64>
call @vector_resize_dim(%output, %c0, %nrows) : (tensor<?xf64, #SparseVec64>, index, index) -> ()
// We do two loops, one to find the outoput vector's nnz
// and one to fill up the output's indices and values.
// We have to get the nnz first to allocate space in the
// output vector correctly.
// We could do one loop where we do both.
// 1) assume that the output nnz is some arbitrary size
// 2) resize accordingly
// 3) on each iteration,
// - store the values and indices
// - track the correct nnz
// - if we reach the limit of whatever size the
// output vector is, resize (double it or
// something like that)
// 4) resize the output vector to the correct nnz
// It's unclear which approach is more performant since
// it depends on how accurate our arbitrary guesses are
// for initial size and how much we should resize.
%output_nnz = scf.for %matrix_row_index = %c0 to %nrows
step %c1
iter_args(%num_diagonal_containing_rows = %c0) -> (index) {
%next_matrix_row_index = addi %matrix_row_index, %c1 : index
%first_ptr_64 = memref.load %matrix_pointers[%matrix_row_index] : memref<?xi64>
%second_ptr_64 = memref.load %matrix_pointers[%next_matrix_row_index] : memref<?xi64>
%first_ptr = index_cast %first_ptr_64 : i64 to index
%second_ptr = index_cast %second_ptr_64 : i64 to index
%matrix_row_index_i64 = index_cast %matrix_row_index : index to i64
%throw_away_ptr, %diagonal_not_found = scf.while (
%ptr = %first_ptr,
%diagonal_position_not_found = %c1_i1
) : (index, i1) -> (index, i1) {
%more_ptrs = std.cmpi "ult", %ptr, %second_ptr : index
%continue = std.and %more_ptrs, %diagonal_position_not_found : i1
scf.condition(%continue) %ptr, %diagonal_position_not_found : index, i1
} do {
^bb0(%current_ptr: index, %throw_away: i1):
%element_column_index_i64 = memref.load %matrix_indices[%current_ptr] : memref<?xi64>
%is_not_diagonal_position = std.cmpi "ne", %element_column_index_i64, %matrix_row_index_i64 : i64
%next_ptr = addi %current_ptr, %c1 : index
scf.yield %next_ptr, %is_not_diagonal_position : index, i1
}
%updated_num_diagonal_containing_rows = scf.if %diagonal_not_found -> (index) {
scf.yield %num_diagonal_containing_rows : index
} else {
%incremented_num_diagonal_containing_rows = addi %num_diagonal_containing_rows, %c1 : index
scf.yield %incremented_num_diagonal_containing_rows : index
}
scf.yield %updated_num_diagonal_containing_rows : index
}
call @vector_resize_pointers(%output, %c0, %c2) : (tensor<?xf64, #SparseVec64>, index, index) -> ()
call @vector_resize_index(%output, %c0, %output_nnz) : (tensor<?xf64, #SparseVec64>, index, index) -> ()
call @vector_resize_values(%output, %output_nnz) : (tensor<?xf64, #SparseVec64>, index) -> ()
%output_pointers = sparse_tensor.pointers %output, %c0 : tensor<?xf64, #SparseVec64> to memref<?xi64>
%output_nnz_i64 = index_cast %output_nnz : index to i64
memref.store %output_nnz_i64, %output_pointers[%c1] : memref<?xi64>
%output_indices = sparse_tensor.indices %output, %c0 : tensor<?xf64, #SparseVec64> to memref<?xi64>
%output_values = sparse_tensor.values %output : tensor<?xf64, #SparseVec64> to memref<?xf64>
scf.for %row_index = %c0 to %nrows
step %c1
iter_args(%output_values_position = %c0) -> (index) {
%first_ptr_64 = memref.load %matrix_pointers[%row_index] : memref<?xi64>
%first_ptr = index_cast %first_ptr_64 : i64 to index
%next_row_index = addi %row_index, %c1 : index
%second_ptr_64 = memref.load %matrix_pointers[%next_row_index] : memref<?xi64>
%second_ptr = index_cast %second_ptr_64 : i64 to index
// instead of having a var for whether or not a diagonal value was found
// and the value itself, we could just track whether or not the diagonal
// value is zero (or whatever the missing value represents).
// This will cause bugs with malformed sparse tensors that have the
// missing value in the values array.
%row_index_i64 = index_cast %row_index : index to i64
%throw_away_ptr, %diagonal_not_found, %diagonal_value = scf.while (
%ptr = %first_ptr,
%diagonal_position_not_found = %c1_i1,
%diagonal_value = %c0_f64 // dummy value
) : (index, i1, f64) -> (index, i1, f64) {
%more_ptrs = std.cmpi "ult", %ptr, %second_ptr : index
%continue = std.and %more_ptrs, %diagonal_position_not_found : i1
scf.condition(%continue) %ptr, %diagonal_position_not_found, %diagonal_value : index, i1, f64
} do {
^bb0(%current_ptr: index, %diagonal_position_not_found: i1, %previous_diagonal_value: f64):
%element_column_index_i64 = memref.load %matrix_indices[%current_ptr] : memref<?xi64>
%is_not_diagonal_position = std.cmpi "ne", %element_column_index_i64, %row_index_i64 : i64
%updated_diagonal_value = scf.if %is_not_diagonal_position -> (f64) {
scf.yield %c0_f64 : f64 // dummy value
} else {
%actual_diagonal_value = memref.load %matrix_values[%current_ptr] : memref<?xf64>
scf.yield %actual_diagonal_value : f64
}
%next_ptr = addi %current_ptr, %c1 : index
scf.yield %next_ptr, %is_not_diagonal_position, %updated_diagonal_value : index, i1, f64
}
%next_output_values_position = scf.if %diagonal_not_found -> (index) {
scf.yield %output_values_position : index
} else {
memref.store %diagonal_value, %output_values[%output_values_position] : memref<?xf64>
memref.store %row_index_i64, %output_indices[%output_values_position] : memref<?xi64>
%updated_output_values_position = addi %output_values_position, %c1 : index
scf.yield %updated_output_values_position : index
}
scf.yield %next_output_values_position : index
}
return %output : tensor<?xf64, #SparseVec64>
}
}
"""
engine.add(mlir_text, GRAPHBLAS_PASSES)
dense_input_tensor = np.array(
[
[1, 0, 0, 0],
[2, 0, 3, 4],
[0, 0, 5, 0],
[0, 0, 6, 7],
],
dtype=np.float64,
)
input_tensor = sparsify_array(dense_input_tensor, [False, True])
sparse_ans = engine.mat_to_vec(input_tensor)
dense_ans = densify_vector(sparse_ans)
print()
print(f"dense_input_tensor \n{repr(dense_input_tensor)}")
print()
print(f"input_tensor.pointers[1] {repr(input_tensor.pointers[1])}")
print(f"input_tensor.indices[1] {repr(input_tensor.indices[1])}")
print(f"input_tensor.values {repr(input_tensor.values)}")
print()
print(f"sparse_ans.pointers[0] {repr(sparse_ans.pointers[0])}")
print(f"sparse_ans.indices[0] {repr(sparse_ans.indices[0])}")
print(f"sparse_ans.values {repr(sparse_ans.values)}")
print()
print(f"dense_ans {repr(dense_ans)}")
print()