The generation of the rowptr_map is extremely slow. See the below profiling results from line_profile
Wrote profile results to testing.py.lprof
Timer unit: 1e-06 s
Total time: 34.5321 s
File: /home/vetschn/src/python/qttools/src/qttools/datastructures/dbcsr.py
Function: from_coo at line 99
Line # Hits Time Per Hit % Time Line Contents
==============================================================
99 @profile
100 @classmethod
101 def from_coo(
102 cls,
103 coo: sps.coo_array,
104 block_sizes: np.ndarray,
105 stackshape: int | tuple,
106 ) -> "DBCSR":
107 """Converts a coo matrix to a DBCSR matrix."""
108 # Sort the data by block-row and -column
109 1 114.3 114.3 0.0 block_offsets = np.hstack(([0], np.cumsum(block_sizes)))
110 1 0.6 0.6 0.0 num_blocks = len(block_sizes)
111
112 1 0.2 0.2 0.0 rowptr_map = {}
113
114 1 0.5 0.5 0.0 if isinstance(stackshape, int):
115 1 0.2 0.2 0.0 stackshape = (stackshape,)
116
117 1 20.3 20.3 0.0 data = np.zeros(stackshape + (coo.nnz,), dtype=coo.data.dtype)
118 1 7.3 7.3 0.0 cols = np.zeros(coo.nnz, dtype=int)
119
120 1 0.2 0.2 0.0 offset = 0
121 401 2818.3 7.0 0.0 for i, j in np.ndindex(num_blocks, num_blocks):
122 400 88299.0 220.7 0.3 inds = (
123 1600 3474326.6 2171.5 10.1 (block_offsets[i] <= coo.row)
124 400 1751373.5 4378.4 5.1 & (coo.row < block_offsets[i + 1])
125 400 1479619.0 3699.0 4.3 & (block_offsets[j] <= coo.col)
126 400 1477859.1 3694.6 4.3 & (coo.col < block_offsets[j + 1])
127 )
128 400 2309967.2 5774.9 6.7 bnnz = np.sum(inds)
129
130 400 657.4 1.6 0.0 if bnnz == 0:
131 continue
132
133 400 677820.4 1694.6 2.0 data[..., offset : offset + bnnz] = coo.data[inds]
134 400 537833.3 1344.6 1.6 cols[offset : offset + bnnz] = coo.col[inds]
135
136 400 2443.4 6.1 0.0 rowptr = np.zeros(block_sizes[i] + 1, dtype=int)
137 10000400 3166637.5 0.3 9.2 for row in coo.row[inds] - block_offsets[i]:
138 10000000 19548554.5 2.0 56.6 rowptr[row + 1] += 1
139 400 12770.2 31.9 0.0 rowptr = np.cumsum(rowptr) + offset
140 400 570.7 1.4 0.0 rowptr_map[(i, j)] = rowptr
141
142 400 448.8 1.1 0.0 offset += bnnz
143
144 1 5.4 5.4 0.0 return cls(data, cols, rowptr_map, block_sizes)
Total time: 48.8996 s
File: testing.py
Function: main at line 8
Line # Hits Time Per Hit % Time Line Contents
==============================================================
8 @profile
9 def main():
10
11 1 4131500.0 4e+06 8.4 coo = sps.random(10000, 10000, density=0.1, format="coo")
12 # coo.data = np.arange(coo.nnz)
13
14 1 3211805.7 3e+06 6.6 inds = np.lexsort((coo.col, coo.row))
15 1 109128.4 109128.4 0.2 coo.row = coo.row[inds]
16 1 98638.5 98638.5 0.2 coo.col = coo.col[inds]
17
18 1 41348481.2 4e+07 84.6 dbcsr = DBCSR.from_coo(coo, [500] * 20, 10)
19 # eye = DBCSR.from_coo(eye, [5]*200, 100)