14 #include "linalg/Matrix.h"
26 int num_f_basis_vectors_used,
27 std::vector<int>& f_sampled_row,
28 std::vector<int>& f_sampled_rows_per_proc,
29 Matrix& f_basis_sampled_inv,
32 const int num_samples_req)
34 CAROM_VERIFY(num_procs == f_sampled_rows_per_proc.size());
41 CAROM_VERIFY(num_f_basis_vectors_used == f_basis->
numColumns());
42 CAROM_VERIFY(f_basis->
numColumns() <= num_samples_req
43 && num_samples_req <= f_basis->numDistributedRows());
44 CAROM_VERIFY(num_samples_req == f_basis_sampled_inv.
numRows()
46 CAROM_VERIFY(num_samples_req == f_sampled_row.size());
51 const int num_samples_req_QR = numCol;
53 std::vector<double> sampled_row_data;
54 if (myid == 0) sampled_row_data.resize(num_samples_req * numCol);
56 std::vector<int> f_sampled_row_owner((myid == 0
62 f_sampled_row_owner.data(),
72 std::vector<int> ns((myid == 0) ? num_procs : 0);
73 std::vector<int> disp((myid == 0) ? num_procs : 0);
74 std::vector<int> all_sampled_rows((myid == 0) ? num_samples_req : 0);
77 for (
int r=0; r<num_procs; ++r)
80 for (
int i=0; i<num_samples_req_QR; ++i)
81 ns[f_sampled_row_owner[i]]++;
84 for (
int r=1; r<num_procs; ++r)
85 disp[r] = disp[r-1] + ns[r-1];
87 CAROM_VERIFY(disp[num_procs-1] + ns[num_procs-1] == num_samples_req_QR);
89 for (
int r=0; r<num_procs; ++r)
91 f_sampled_rows_per_proc[r] = ns[r];
95 for (
int i=0; i<num_samples_req_QR; ++i)
97 const int owner = f_sampled_row_owner[i];
98 all_sampled_rows[disp[owner] + ns[owner]] = f_sampled_row[i];
104 for (
int i=0; i<num_samples_req_QR; ++i)
105 f_sampled_row[i] = all_sampled_rows[i];
108 for (
int r=0; r<num_procs; ++r)
110 for (
int i=0; i<f_sampled_rows_per_proc[r]; ++i)
112 f_sampled_row_owner[os + i] = r;
115 os += f_sampled_rows_per_proc[r];
119 MPI_Bcast(f_sampled_rows_per_proc.data(), num_procs, MPI_INT, 0,
123 MPI_Scatter(ns.data(), 1, MPI_INT, &count, 1, MPI_INT, 0, MPI_COMM_WORLD);
125 std::vector<int> my_sampled_rows(count);
126 std::vector<double> my_sampled_row_data(count*numCol);
128 MPI_Scatterv(all_sampled_rows.data(), ns.data(), disp.data(), MPI_INT,
129 my_sampled_rows.data(), count, MPI_INT, 0, MPI_COMM_WORLD);
131 std::vector<int> row_offset(num_procs);
132 row_offset[myid] = f_basis->
numRows();
134 CAROM_VERIFY(MPI_Allgather(MPI_IN_PLACE, 1, MPI_INT, row_offset.data(), 1,
135 MPI_INT, MPI_COMM_WORLD) == MPI_SUCCESS);
138 for (
int i=0; i<num_procs; ++i)
141 row_offset[i] = os - row_offset[i];
144 for (
int i=0; i<count; ++i)
146 const int msr = my_sampled_rows[i];
147 const bool mycond = my_sampled_rows[i] >= row_offset[myid]
148 && my_sampled_rows[i] < row_offset[myid] + f_basis->
numRows();
149 CAROM_VERIFY(my_sampled_rows[i] >= row_offset[myid]
150 && my_sampled_rows[i] < row_offset[myid] + f_basis->
numRows());
151 const int row = my_sampled_rows[i] - row_offset[myid];
153 for (
int j=0; j<numCol; ++j)
154 my_sampled_row_data[os + j] = f_basis->
item(row, j);
159 for (
int r=0; r<num_procs; ++r)
166 MPI_Gatherv(my_sampled_row_data.data(), count*numCol, MPI_DOUBLE,
167 sampled_row_data.data(),
168 ns.data(), disp.data(), MPI_DOUBLE, 0, MPI_COMM_WORLD);
170 const int nf = f_basis->
numRows();
172 std::vector<int> rcnt(num_procs);
173 std::vector<int> rdsp(num_procs);
174 MPI_Gather(&nf, 1, MPI_INT, rcnt.data(), 1, MPI_INT, 0, MPI_COMM_WORLD);
180 for (
int i=0; i<num_procs; ++i)
183 for (
int i=1; i<num_procs; ++i)
184 rdsp[i] = rdsp[i-1] + rcnt[i-1];
187 std::set<int> globalSamples;
191 for (
int i=0; i<num_procs; ++i)
193 for (
int j=0; j<f_sampled_rows_per_proc[i]; ++j, ++count)
195 globalSamples.insert(f_sampled_row[count]);
199 CAROM_VERIFY(count == numCol);
202 std::vector<double> rg(myid == 0 ? nglobal : 0);
203 std::vector<int> isort(myid == 0 ? nglobal : 0);
212 for (
int s = numCol; s < num_samples_req; ++s)
224 for (
int i=0; i<m; ++i)
226 for (
int j=0; j<n; ++j)
228 A.
getData()[i + (j*m)] = sampled_row_data[(i*numCol) + j];
243 MPI_Bcast(&g, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
246 MPI_Bcast(V.
getData(), n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
254 std::vector<double> r(nf);
256 for (
int i=0; i<nf; ++i)
260 for (
int j=0; j<n; ++j)
261 r[i] += Ubt->
item(i, j) * Ubt->
item(i,j);
263 r[i] -= sqrt((r[i] * r[i]) - (4.0 * g * Ubt->
item(i, n-1) * Ubt->
item(i, n-1)));
266 MPI_Gatherv(r.data(), nf, MPI_DOUBLE, rg.data(), rcnt.data(), rdsp.data(),
267 MPI_DOUBLE, 0, MPI_COMM_WORLD);
272 for (
int i=0; i<nglobal; ++i)
277 std::sort(isort.begin(), isort.end(), [&](
const int& a,
const int& b) {
278 return (rg[a] > rg[b]);
283 f_sampled_row[s] = -1;
284 for (
int i=0; i<nglobal; ++i)
286 std::set<int>::iterator it = globalSamples.find(isort[i]);
287 if (it == globalSamples.end())
289 CAROM_VERIFY(i <= s && f_sampled_row[s] == -1);
290 f_sampled_row[s] = isort[i];
295 CAROM_VERIFY(f_sampled_row[s] >= 0);
296 for (
int i=num_procs-1; i >= 0; --i)
298 if (f_sampled_row[s] >= row_offset[i])
305 CAROM_VERIFY(owner >= 0);
306 f_sampled_rows_per_proc[owner]++;
307 f_sampled_row_owner[s] = owner;
309 for (
int i=0; i < num_procs; ++i)
312 ns[owner] = f_sampled_row[s];
313 globalSamples.insert(f_sampled_row[s]);
321 MPI_Scatter(ns.data(), 1, MPI_INT, &sample, 1, MPI_INT, 0, MPI_COMM_WORLD);
323 const int tagSendRecv = 111;
326 CAROM_VERIFY(sample >= row_offset[myid]);
327 MPI_Send(f_basis->
getData() + ((sample - row_offset[myid]) * numCol),
328 numCol, MPI_DOUBLE, 0, tagSendRecv, MPI_COMM_WORLD);
334 MPI_Recv(sampled_row_data.data() + (s*numCol), numCol, MPI_DOUBLE,
335 owner, tagSendRecv, MPI_COMM_WORLD, &status);
347 for (
int r=1; r<num_procs; ++r)
350 disp[r] = disp[r-1] + f_sampled_rows_per_proc[r-1];
353 for (
int i=0; i<num_samples_req; ++i)
355 const int owner = f_sampled_row_owner[i];
356 f_sampled_row[i] -= row_offset[owner];
358 all_sampled_rows[disp[owner] + ns[owner]] = i;
362 std::vector<int> sortedRow(num_samples_req);
364 for (
int r=0; r<num_procs; ++r)
366 CAROM_VERIFY(ns[r] == f_sampled_rows_per_proc[r]);
369 for (
int i=0; i<ns[r]; ++i)
374 std::sort(isort.begin(), isort.begin() + ns[r], [&](
const int& a,
376 return (f_sampled_row[all_sampled_rows[disp[r] + a]] <
377 f_sampled_row[all_sampled_rows[disp[r] + b]]);
381 for (
int i=0; i<ns[r]; ++i)
383 const int ig = all_sampled_rows[disp[r] + isort[i]];
384 const int s = (disp[r] + i);
388 for (
int j=0; j<numCol; ++j)
390 f_basis_sampled_inv.
item(s, j) = sampled_row_data[(ig*numCol) + j];
393 sortedRow[s] = f_sampled_row[ig];
397 for (
int i=0; i<num_samples_req; ++i)
398 f_sampled_row[i] = sortedRow[i];
401 MPI_Bcast(f_sampled_row.data(), num_samples_req, MPI_INT, 0, MPI_COMM_WORLD);
402 MPI_Bcast(f_sampled_rows_per_proc.data(), num_procs, MPI_INT, 0,
409 for (
int i = 0; i < num_samples_req_QR; i++) {
410 for (
int j = 0; j < numCol; j++) {
411 f_basis_sampled_inv.
item(i, j) = f_basis->
item(f_sampled_row[i], j);
415 f_sampled_rows_per_proc[0] = numCol;
421 CAROM_VERIFY(numCol == num_samples_req);
double * getData() const
Get the matrix data as a pointer.
void transposePseudoinverse()
Computes the transposePseudoinverse of this.
bool distributed() const
Returns true if the Matrix is distributed.
int numColumns() const
Returns the number of columns in the Matrix. This method will return the same value from each process...
Matrix * mult(const Matrix &other) const
Multiplies this Matrix with other and returns the product, reference version.
const double & item(int row, int col) const
Const Matrix member access. Matrix data is stored in row-major format.
int numRows() const
Returns the number of rows of the Matrix on this processor.
void transpose()
Replaces this Matrix with its transpose (in place), in the serial square case only.
void qrcp_pivots_transpose(int *row_pivot, int *row_pivot_owner, int pivots_requested) const
Compute the leading numColumns() column pivots from a QR decomposition with column pivots (QRCP) of t...
double * getData() const
Get the vector data as a pointer.
void SerialSVD(Matrix *A, Matrix *U, Vector *S, Matrix *V)
Computes the SVD of a undistributed matrix in serial.
void QDEIM(const Matrix *f_basis, int num_f_basis_vectors_used, std::vector< int > &f_sampled_row, std::vector< int > &f_sampled_rows_per_proc, Matrix &f_basis_sampled_inv, const int myid, const int num_procs, const int num_samples_req)
Computes the QDEIM algorithm on the given basis.