14 #include "linalg/Matrix.h"
26 int num_f_basis_vectors_used,
27 std::vector<int>& f_sampled_row,
28 std::vector<int>& f_sampled_rows_per_proc,
29 Matrix& f_basis_sampled_inv,
32 const int num_samples_req)
34 CAROM_VERIFY(num_procs == f_sampled_rows_per_proc.size());
41 CAROM_VERIFY(num_f_basis_vectors_used == f_basis.
numColumns());
42 CAROM_VERIFY(f_basis.
numColumns() <= num_samples_req
44 CAROM_VERIFY(num_samples_req == f_basis_sampled_inv.
numRows()
46 CAROM_VERIFY(num_samples_req == f_sampled_row.size());
51 const int num_samples_req_QR = numCol;
53 std::vector<double> sampled_row_data;
54 if (myid == 0) sampled_row_data.resize(num_samples_req * numCol);
56 std::vector<int> f_sampled_row_owner((myid == 0
62 f_sampled_row_owner.data(),
72 std::vector<int> ns((myid == 0) ? num_procs : 0);
73 std::vector<int> disp((myid == 0) ? num_procs : 0);
74 std::vector<int> all_sampled_rows((myid == 0) ? num_samples_req : 0);
77 for (
int r=0; r<num_procs; ++r)
80 for (
int i=0; i<num_samples_req_QR; ++i)
81 ns[f_sampled_row_owner[i]]++;
84 for (
int r=1; r<num_procs; ++r)
85 disp[r] = disp[r-1] + ns[r-1];
87 CAROM_VERIFY(disp[num_procs-1] + ns[num_procs-1] == num_samples_req_QR);
89 for (
int r=0; r<num_procs; ++r)
91 f_sampled_rows_per_proc[r] = ns[r];
95 for (
int i=0; i<num_samples_req_QR; ++i)
97 const int owner = f_sampled_row_owner[i];
98 all_sampled_rows[disp[owner] + ns[owner]] = f_sampled_row[i];
104 for (
int i=0; i<num_samples_req_QR; ++i)
105 f_sampled_row[i] = all_sampled_rows[i];
108 for (
int r=0; r<num_procs; ++r)
110 for (
int i=0; i<f_sampled_rows_per_proc[r]; ++i)
112 f_sampled_row_owner[os + i] = r;
115 os += f_sampled_rows_per_proc[r];
119 MPI_Bcast(f_sampled_rows_per_proc.data(), num_procs, MPI_INT, 0,
123 MPI_Scatter(ns.data(), 1, MPI_INT, &count, 1, MPI_INT, 0, MPI_COMM_WORLD);
125 std::vector<int> my_sampled_rows(count);
126 std::vector<double> my_sampled_row_data(count*numCol);
128 MPI_Scatterv(all_sampled_rows.data(), ns.data(), disp.data(), MPI_INT,
129 my_sampled_rows.data(), count, MPI_INT, 0, MPI_COMM_WORLD);
131 std::vector<int> row_offset(num_procs);
132 row_offset[myid] = f_basis.
numRows();
134 CAROM_VERIFY(MPI_Allgather(MPI_IN_PLACE, 1, MPI_INT, row_offset.data(), 1,
135 MPI_INT, MPI_COMM_WORLD) == MPI_SUCCESS);
138 for (
int i=0; i<num_procs; ++i)
141 row_offset[i] = os - row_offset[i];
144 for (
int i=0; i<count; ++i)
146 const int msr = my_sampled_rows[i];
147 const bool mycond = my_sampled_rows[i] >= row_offset[myid]
148 && my_sampled_rows[i] < row_offset[myid] + f_basis.
numRows();
149 CAROM_VERIFY(my_sampled_rows[i] >= row_offset[myid]
150 && my_sampled_rows[i] < row_offset[myid] + f_basis.
numRows());
151 const int row = my_sampled_rows[i] - row_offset[myid];
153 for (
int j=0; j<numCol; ++j)
154 my_sampled_row_data[os + j] = f_basis(row, j);
159 for (
int r=0; r<num_procs; ++r)
166 MPI_Gatherv(my_sampled_row_data.data(), count*numCol, MPI_DOUBLE,
167 sampled_row_data.data(),
168 ns.data(), disp.data(), MPI_DOUBLE, 0, MPI_COMM_WORLD);
170 const int nf = f_basis.
numRows();
172 std::vector<int> rcnt(num_procs);
173 std::vector<int> rdsp(num_procs);
174 MPI_Gather(&nf, 1, MPI_INT, rcnt.data(), 1, MPI_INT, 0, MPI_COMM_WORLD);
180 for (
int i=0; i<num_procs; ++i)
183 for (
int i=1; i<num_procs; ++i)
184 rdsp[i] = rdsp[i-1] + rcnt[i-1];
187 std::set<int> globalSamples;
191 for (
int i=0; i<num_procs; ++i)
193 for (
int j=0; j<f_sampled_rows_per_proc[i]; ++j, ++count)
195 globalSamples.insert(f_sampled_row[count]);
199 CAROM_VERIFY(count == numCol);
202 std::vector<double> rg(myid == 0 ? nglobal : 0);
203 std::vector<int> isort(myid == 0 ? nglobal : 0);
212 for (
int s = numCol; s < num_samples_req; ++s)
224 for (
int i=0; i<m; ++i)
226 for (
int j=0; j<n; ++j)
228 A.
getData()[i + (j*m)] = sampled_row_data[(i*numCol) + j];
243 MPI_Bcast(&g, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
246 MPI_Bcast(V.
getData(), n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
249 std::unique_ptr<Matrix> Ubt = f_basis.
mult(V);
251 CAROM_VERIFY(Ubt->distributed() && Ubt->numRows() == f_basis.
numRows()
252 && Ubt->numColumns() == n);
254 std::vector<double> r(nf);
256 for (
int i=0; i<nf; ++i)
260 for (
int j=0; j<n; ++j)
261 r[i] += Ubt->item(i, j) * Ubt->item(i,j);
263 r[i] -= sqrt((r[i] * r[i]) - (4.0 * g * Ubt->item(i, n-1) * Ubt->item(i, n-1)));
266 MPI_Gatherv(r.data(), nf, MPI_DOUBLE, rg.data(), rcnt.data(), rdsp.data(),
267 MPI_DOUBLE, 0, MPI_COMM_WORLD);
272 for (
int i=0; i<nglobal; ++i)
277 std::sort(isort.begin(), isort.end(), [&](
const int& a,
const int& b) {
278 return (rg[a] > rg[b]);
283 f_sampled_row[s] = -1;
284 for (
int i=0; i<nglobal; ++i)
286 std::set<int>::iterator it = globalSamples.find(isort[i]);
287 if (it == globalSamples.end())
289 CAROM_VERIFY(i <= s && f_sampled_row[s] == -1);
290 f_sampled_row[s] = isort[i];
295 CAROM_VERIFY(f_sampled_row[s] >= 0);
296 for (
int i=num_procs-1; i >= 0; --i)
298 if (f_sampled_row[s] >= row_offset[i])
305 CAROM_VERIFY(owner >= 0);
306 f_sampled_rows_per_proc[owner]++;
307 f_sampled_row_owner[s] = owner;
309 for (
int i=0; i < num_procs; ++i)
312 ns[owner] = f_sampled_row[s];
313 globalSamples.insert(f_sampled_row[s]);
321 MPI_Scatter(ns.data(), 1, MPI_INT, &sample, 1, MPI_INT, 0, MPI_COMM_WORLD);
323 const int tagSendRecv = 111;
326 CAROM_VERIFY(sample >= row_offset[myid]);
327 MPI_Send(f_basis.
getData() + ((sample - row_offset[myid]) * numCol),
328 numCol, MPI_DOUBLE, 0, tagSendRecv, MPI_COMM_WORLD);
334 MPI_Recv(sampled_row_data.data() + (s*numCol), numCol, MPI_DOUBLE,
335 owner, tagSendRecv, MPI_COMM_WORLD, &status);
345 for (
int r=1; r<num_procs; ++r)
348 disp[r] = disp[r-1] + f_sampled_rows_per_proc[r-1];
351 for (
int i=0; i<num_samples_req; ++i)
353 const int owner = f_sampled_row_owner[i];
354 f_sampled_row[i] -= row_offset[owner];
356 all_sampled_rows[disp[owner] + ns[owner]] = i;
360 std::vector<int> sortedRow(num_samples_req);
362 for (
int r=0; r<num_procs; ++r)
364 CAROM_VERIFY(ns[r] == f_sampled_rows_per_proc[r]);
367 for (
int i=0; i<ns[r]; ++i)
372 std::sort(isort.begin(), isort.begin() + ns[r], [&](
const int& a,
374 return (f_sampled_row[all_sampled_rows[disp[r] + a]] <
375 f_sampled_row[all_sampled_rows[disp[r] + b]]);
379 for (
int i=0; i<ns[r]; ++i)
381 const int ig = all_sampled_rows[disp[r] + isort[i]];
382 const int s = (disp[r] + i);
386 for (
int j=0; j<numCol; ++j)
388 f_basis_sampled_inv(s, j) = sampled_row_data[(ig*numCol) + j];
391 sortedRow[s] = f_sampled_row[ig];
395 for (
int i=0; i<num_samples_req; ++i)
396 f_sampled_row[i] = sortedRow[i];
399 MPI_Bcast(f_sampled_row.data(), num_samples_req, MPI_INT, 0, MPI_COMM_WORLD);
400 MPI_Bcast(f_sampled_rows_per_proc.data(), num_procs, MPI_INT, 0,
407 for (
int i = 0; i < num_samples_req_QR; i++) {
408 for (
int j = 0; j < numCol; j++) {
409 f_basis_sampled_inv(i, j) = f_basis(f_sampled_row[i], j);
413 f_sampled_rows_per_proc[0] = numCol;
419 CAROM_VERIFY(numCol == num_samples_req);
double * getData() const
Get the matrix data as a pointer.
void transposePseudoinverse()
Computes the transposePseudoinverse of this.
bool distributed() const
Returns true if the Matrix is distributed.
int numColumns() const
Returns the number of columns in the Matrix. This method will return the same value from each process...
int numDistributedRows() const
Returns the number of rows of the Matrix across all processors.
int numRows() const
Returns the number of rows of the Matrix on this processor.
std::unique_ptr< Matrix > mult(const Matrix &other) const
Multiplies this Matrix with other and returns the product.
void transpose()
Replaces this Matrix with its transpose (in place), in the serial square case only.
void qrcp_pivots_transpose(int *row_pivot, int *row_pivot_owner, int pivots_requested) const
Compute the leading numColumns() column pivots from a QR decomposition with column pivots (QRCP) of t...
double * getData() const
Get the vector data as a pointer.
void SerialSVD(Matrix *A, Matrix *U, Vector *S, Matrix *V)
Computes the SVD of a undistributed matrix in serial.
void QDEIM(const Matrix &f_basis, int num_f_basis_vectors_used, std::vector< int > &f_sampled_row, std::vector< int > &f_sampled_rows_per_proc, Matrix &f_basis_sampled_inv, const int myid, const int num_procs, const int num_samples_req)
Computes the QDEIM algorithm on the given basis.