Skip to content

Commit 89feb91

Browse files
committed
Initial implementation of COO / CSR sparse format
The key of this is the API, as shown with examples in test_sparse.f90. In particular, I am proposing the following functions / API: Higher level API: Conversion Dense <-> COO: * dense2coo * coo2dense Conversion COO -> CSR: * coo2csr CSR functionality: * csr_matvec * csr_getvalue Dense functionality: * getnnz Lower level API: * csr_has_canonical_format * csr_sum_duplicates * csr_sort_indices In these algorithms, it seems it is possible to just have one (best) implementation with one exception: sorting of indices (as implemented by `csr_sort_indices`), where different algorithms give different performance and it depends on the actual matrix. As such, it might make sense to provide a default in `csr_sort_indices` that is called by `coo2csr` that is as fast as we can make it for most cases (currently it uses quicksort, but there are other faster algoritms for our use case that we should switch over) and then provide other implementations such as `csr_sort_indices_mergesort`, `csr_sort_indices_timsort`, etc. for users so that they can choose the algorithm for sorting indices, or even provide their own. The file stdlib_experimental_sparse.f90 provides an example implementation of these algorithms that was inspired by SciPy. We can also discuss the details of that, but the key that I would like to discuss is the API itself.
1 parent ce987d2 commit 89feb91

File tree

6 files changed

+456
-0
lines changed

6 files changed

+456
-0
lines changed

src/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ set(SRC
3232
stdlib_experimental_error.f90
3333
stdlib_experimental_kinds.f90
3434
stdlib_experimental_system.F90
35+
stdlib_experimental_sparse.f90
3536
${outFiles}
3637
)
3738

src/Makefile.manual

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ SRC = f18estop.f90 \
88
stdlib_experimental_optval.f90 \
99
stdlib_experimental_quadrature.f90 \
1010
stdlib_experimental_quadrature_trapz.f90 \
11+
stdlib_experimental_sparse.f90 \
1112
stdlib_experimental_stats.f90 \
1213
stdlib_experimental_stats_mean.f90 \
1314
stdlib_experimental_stats_moment.f90 \

src/stdlib_experimental_sparse.f90

+335
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,335 @@
1+
module stdlib_experimental_sparse
2+
use stdlib_experimental_kinds, only: dp
3+
implicit none
4+
private
5+
public coo2dense, dense2coo, getnnz, coo2csr, coo2csc, &
6+
csr_has_canonical_format, csr_sum_duplicates, csr_sort_indices, &
7+
coo2csr_canonical, csr_matvec, csr_getvalue
8+
9+
contains
10+
11+
! Dense
12+
13+
subroutine dense2coo(B, Ai, Aj, Ax)
14+
real(dp), intent(in) :: B(:, :)
15+
integer, intent(out) :: Ai(:), Aj(:)
16+
real(dp), intent(out) :: Ax(:)
17+
integer :: i, j, idx
18+
idx = 1
19+
do j = 1, size(B, 2)
20+
do i = 1, size(B, 1)
21+
if (abs(B(i, j)) < tiny(1._dp)) cycle
22+
Ai(idx) = i
23+
Aj(idx) = j
24+
Ax(idx) = B(i, j)
25+
idx = idx + 1
26+
end do
27+
end do
28+
end subroutine
29+
30+
integer function getnnz(B) result(nnz)
31+
real(dp), intent(in) :: B(:, :)
32+
integer :: i, j
33+
nnz = 0
34+
do j = 1, size(B, 2)
35+
do i = 1, size(B, 1)
36+
if (abs(B(i, j)) < tiny(1._dp)) cycle
37+
nnz = nnz + 1
38+
end do
39+
end do
40+
end function
41+
42+
! COO
43+
44+
subroutine coo2dense(Ai, Aj, Ax, B)
45+
integer, intent(in) :: Ai(:), Aj(:)
46+
real(dp), intent(in) :: Ax(:)
47+
real(dp), intent(out) :: B(:, :)
48+
integer :: n
49+
B = 0
50+
do n = 1, size(Ai)
51+
B(Ai(n), Aj(n)) = B(Ai(n), Aj(n)) + Ax(n)
52+
end do
53+
end subroutine
54+
55+
subroutine coo2csr(Ai, Aj, Ax, Bp, Bj, Bx)
56+
! Converts from COO (Ai, Aj, Ax) into CSR (Bp, Bj, Bx)
57+
! Row and column indices are *not* assumed to be ordered.
58+
! Duplicate entries are carried over to the CSR representation.
59+
integer, intent(in) :: Ai(:), Aj(:)
60+
real(dp), intent(in) :: Ax(:)
61+
integer, intent(out) :: Bp(:), Bj(:)
62+
real(dp), intent(out) :: Bx(:)
63+
integer :: n, i, n_row, nnz, cumsum, temp, row, dest
64+
n_row = size(Bp)-1
65+
nnz = size(Ai)
66+
Bp = 0
67+
forall(n = 1:nnz) Bp(Ai(n)) = Bp(Ai(n)) + 1
68+
cumsum = 1
69+
do i = 1, n_row
70+
temp = Bp(i)
71+
Bp(i) = cumsum
72+
cumsum = cumsum + temp
73+
end do
74+
do n = 1, nnz
75+
row = Ai(n)
76+
dest = Bp(row)
77+
Bj(dest) = Aj(n)
78+
Bx(dest) = Ax(n)
79+
Bp(row) = Bp(row) + 1
80+
end do
81+
Bp(2:) = Bp(:n_row)
82+
Bp(1) = 1
83+
end subroutine
84+
85+
subroutine coo2csc(Ai, Aj, Ax, Bp, Bi, Bx)
86+
! Converts from COO (Ai, Aj, Ax) into CSC (Bp, Bi, Bx)
87+
! Row and column indices are *not* assumed to be ordered.
88+
! Duplicate entries are carried over to the CSC representation.
89+
integer, intent(in) :: Ai(:), Aj(:)
90+
real(dp), intent(in) :: Ax(:)
91+
integer, intent(out) :: Bp(:), Bi(:)
92+
real(dp), intent(out) :: Bx(:)
93+
! Calculate CSR of the transposed matrix:
94+
call coo2csr(Aj, Ai, Ax, Bp, Bi, Bx)
95+
end subroutine
96+
97+
subroutine coo2csr_canonical(Ai, Aj, Ax, Bp, Bj, Bx, verbose)
98+
! Converts from COO (Ai, Aj, Ax) into CSR (Bp, Bj, Bx)
99+
! Row and column indices are *not* assumed to be ordered.
100+
! Duplicate entries are summed up and the indices are ordered.
101+
integer, intent(in) :: Ai(:), Aj(:)
102+
real(dp), intent(in) :: Ax(:)
103+
integer, allocatable, intent(out) :: Bp(:), Bj(:)
104+
real(dp), allocatable, intent(out) :: Bx(:)
105+
logical, optional, intent(in) :: verbose
106+
integer :: Bj_(size(Ai))
107+
real(dp) :: Bx_(size(Ai))
108+
integer :: nnz
109+
logical :: verbose_
110+
verbose_ = .false.
111+
if (present(verbose)) verbose_ = verbose
112+
allocate(Bp(maxval(Ai)+1))
113+
if (verbose_) print *, "coo2csr"
114+
call coo2csr(Ai, Aj, Ax, Bp, Bj_, Bx_)
115+
if (verbose_) print *, "csr_sort_indices"
116+
call csr_sort_indices(Bp, Bj_, Bx_)
117+
if (verbose_) print *, "csr_sum_duplicates"
118+
call csr_sum_duplicates(Bp, Bj_, Bx_)
119+
if (verbose_) print *, "done"
120+
nnz = Bp(size(Bp))-1
121+
allocate(Bj(nnz), Bx(nnz))
122+
Bj = Bj_(:nnz)
123+
Bx = Bx_(:nnz)
124+
end subroutine
125+
126+
! CSR
127+
128+
logical function csr_has_canonical_format(Ap, Aj) result(r)
129+
! Determine whether the matrix structure is canonical CSR.
130+
! Canonical CSR implies that column indices within each row
131+
! are (1) sorted and (2) unique. Matrices that meet these
132+
! conditions facilitate faster matrix computations.
133+
integer, intent(in) :: Ap(:), Aj(:)
134+
integer :: i, j
135+
r = .false.
136+
do i = 1, size(Ap)-1
137+
if (Ap(i) > Ap(i+1)) return
138+
do j = Ap(i)+1, Ap(i+1)-1
139+
if (Aj(j-1) >= Aj(j)) return
140+
end do
141+
end do
142+
r = .true.
143+
end function
144+
145+
subroutine csr_sum_duplicates(Ap, Aj, Ax)
146+
! Sum together duplicate column entries in each row of CSR matrix A
147+
! The column indicies within each row must be in sorted order.
148+
! Explicit zeros are retained.
149+
! Ap, Aj, and Ax will be modified *inplace*
150+
integer, intent(inout) :: Ap(:), Aj(:)
151+
real(dp), intent(inout) :: Ax(:)
152+
integer :: nnz, r1, r2, i, j, jj
153+
real(dp) :: x
154+
nnz = 1
155+
r2 = 1
156+
do i = 1, size(Ap) - 1
157+
r1 = r2
158+
r2 = Ap(i+1)
159+
jj = r1
160+
do while (jj < r2)
161+
j = Aj(jj)
162+
x = Ax(jj)
163+
jj = jj + 1
164+
do while (jj < r2)
165+
if (Aj(jj) == j) then
166+
x = x + Ax(jj)
167+
jj = jj + 1
168+
else
169+
exit
170+
end if
171+
end do
172+
Aj(nnz) = j
173+
Ax(nnz) = x
174+
nnz = nnz + 1
175+
end do
176+
Ap(i+1) = nnz
177+
end do
178+
end subroutine
179+
180+
subroutine csr_sort_indices(Ap, Aj, Ax)
181+
! Sort CSR column indices inplace
182+
integer, intent(inout) :: Ap(:), Aj(:)
183+
real(dp), intent(inout) :: Ax(:)
184+
integer :: i, r1, r2, l, idx(size(Aj))
185+
do i = 1, size(Ap)-1
186+
r1 = Ap(i)
187+
r2 = Ap(i+1)-1
188+
l = r2-r1+1
189+
idx(:l) = iargsort_quicksort(Aj(r1:r2))
190+
Aj(r1:r2) = Aj(r1+idx(:l)-1)
191+
Ax(r1:r2) = Ax(r1+idx(:l)-1)
192+
end do
193+
end subroutine
194+
195+
function csr_matvec(Ap, Aj, Ax, x) result(y)
196+
! Compute y = A*x for CSR matrix A and dense vectors x, y
197+
integer, intent(in) :: Ap(:), Aj(:)
198+
real(dp), intent(in) :: Ax(:), x(:)
199+
real(dp) :: y(size(Ap)-1)
200+
integer :: i
201+
!$omp parallel default(none) shared(Ap, Aj, Ax, x, y) private(i)
202+
!$omp do
203+
do i = 1, size(Ap)-1
204+
y(i) = dot_product(Ax(Ap(i):Ap(i+1)-1), x(Aj(Ap(i):Ap(i+1)-1)))
205+
end do
206+
!$omp end do
207+
!$omp end parallel
208+
end function
209+
210+
integer function lower_bound(A, val) result(i)
211+
! Returns the lowest index "i" into the sorted array A so that A(i) >= val
212+
! It uses bisection.
213+
integer, intent(in) :: A(:), val
214+
integer :: l, idx
215+
if (A(1) >= val) then
216+
i = 1
217+
return
218+
end if
219+
if (A(size(A)) < val) then
220+
i = size(A)+1
221+
return
222+
end if
223+
l = 1
224+
i = size(A)
225+
! Now we always have A(l) < val; A(i) >= val and we must make sure that "i" is
226+
! the lowest possible such index.
227+
do while (l + 1 < i)
228+
idx = (l+i) / 2
229+
if (A(idx) < val) then
230+
l = idx
231+
else
232+
i = idx
233+
end if
234+
end do
235+
end function
236+
237+
238+
real(dp) function csr_getvalue(Ap, Aj, Ax, i, j) result(r)
239+
! Returns A(i, j) where the matrix A is given in the CSR format using
240+
! (Ap, Aj, Ax) triple. Assumes A to be in canonical CSR format.
241+
integer, intent(in) :: Ap(:), Aj(:)
242+
real(dp), intent(in) :: Ax(:)
243+
integer, intent(in) :: i, j
244+
integer :: row_start, row_end, offset
245+
row_start = Ap(i)
246+
row_end = Ap(i+1)-1
247+
offset = lower_bound(Aj(row_start:row_end), j) + row_start - 1
248+
if (offset <= row_end) then
249+
if (Aj(offset) == j) then
250+
r = Ax(offset)
251+
return
252+
end if
253+
end if
254+
r = 0
255+
end function
256+
257+
pure elemental subroutine swap_int(x,y)
258+
integer, intent(in out) :: x,y
259+
integer :: z
260+
z = x
261+
x = y
262+
y = z
263+
end subroutine
264+
265+
pure subroutine interchange_sort_map_int(vec,map)
266+
integer, intent(in out) :: vec(:)
267+
integer, intent(in out) :: map(:)
268+
integer :: i,j
269+
do i = 1,size(vec) - 1
270+
j = minloc(vec(i:),1)
271+
if (j > 1) then
272+
call swap_int(vec(i),vec(i + j - 1))
273+
call swap_int(map(i),map(i + j - 1))
274+
end if
275+
end do
276+
end subroutine
277+
278+
pure function iargsort_quicksort(vec_) result(map)
279+
integer, intent(in) :: vec_(:)
280+
integer :: map(size(vec_))
281+
integer, parameter :: levels = 300
282+
integer, parameter :: max_interchange_sort_size = 20
283+
integer :: i,left,right,l_bound(levels),u_bound(levels)
284+
integer :: pivot
285+
integer :: vec(size(vec_))
286+
287+
vec = vec_
288+
289+
forall(i=1:size(vec)) map(i) = i
290+
291+
l_bound(1) = 1
292+
u_bound(1) = size(vec)
293+
i = 1
294+
do while(i >= 1)
295+
left = l_bound(i)
296+
right = u_bound(i)
297+
if (right - left < max_interchange_sort_size) then
298+
if (left < right) call interchange_sort_map_int(vec(left:right),map(left:right))
299+
i = i - 1
300+
else
301+
pivot = (vec(left) + vec(right)) / 2
302+
left = left - 1
303+
right = right + 1
304+
do
305+
do
306+
left = left + 1
307+
if (vec(left) >= pivot) exit
308+
end do
309+
do
310+
right = right - 1
311+
if (vec(right) <= pivot) exit
312+
end do
313+
if (left < right) then
314+
call swap_int(vec(left),vec(right))
315+
call swap_int(map(left),map(right))
316+
elseif(left == right) then
317+
if (left == l_bound(i)) then
318+
left = left + 1
319+
else
320+
right = right - 1
321+
end if
322+
exit
323+
else
324+
exit
325+
end if
326+
end do
327+
u_bound(i + 1) = u_bound(i)
328+
l_bound(i + 1) = left
329+
u_bound(i) = right
330+
i = i + 1
331+
end if
332+
end do
333+
end function
334+
335+
end module

src/tests/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ add_subdirectory(ascii)
1010
add_subdirectory(io)
1111
add_subdirectory(linalg)
1212
add_subdirectory(optval)
13+
add_subdirectory(sparse)
1314
add_subdirectory(stats)
1415
add_subdirectory(system)
1516
add_subdirectory(quadrature)

src/tests/sparse/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
ADDTEST(sparse)

0 commit comments

Comments
 (0)