libROM  v1.0
Data-driven physical simulation library
QDEIM.cpp
1 
11 // Description: Interface to the QDEIM algorithm to determine the rows of the
12 // rhs to be sampled for the interpolation of the rhs.
13 
14 #include "linalg/Matrix.h"
15 #include "mpi.h"
16 #include <cmath>
17 
18 #include <vector>
19 #include <set>
20 #include <algorithm>
21 
22 namespace CAROM {
23 
24 void
25 QDEIM(const Matrix* f_basis,
26  int num_f_basis_vectors_used,
27  std::vector<int>& f_sampled_row,
28  std::vector<int>& f_sampled_rows_per_proc,
29  Matrix& f_basis_sampled_inv,
30  const int myid,
31  const int num_procs,
32  const int num_samples_req)
33 {
34  CAROM_VERIFY(num_procs == f_sampled_rows_per_proc.size());
35 
36  // This algorithm determines the rows of f that should be sampled, the
37  // processor that owns each sampled row, and fills f_basis_sampled_inv with the
38  // sampled rows of the basis of the RHS.
39 
40  // The QR implementation uses the entire matrix.
41  CAROM_VERIFY(num_f_basis_vectors_used == f_basis->numColumns());
42  CAROM_VERIFY(f_basis->numColumns() <= num_samples_req
43  && num_samples_req <= f_basis->numDistributedRows());
44  CAROM_VERIFY(num_samples_req == f_basis_sampled_inv.numRows()
45  && f_basis->numColumns() == f_basis_sampled_inv.numColumns());
46  CAROM_VERIFY(num_samples_req == f_sampled_row.size());
47  CAROM_VERIFY(!f_basis_sampled_inv.distributed());
48 
49  // QR will determine (numCol) pivots, which will define the first (numCol) samples.
50  const int numCol = f_basis->numColumns();
51  const int num_samples_req_QR = numCol;
52 
53  std::vector<double> sampled_row_data;
54  if (myid == 0) sampled_row_data.resize(num_samples_req * numCol);
55 
56  std::vector<int> f_sampled_row_owner((myid == 0
57  && f_basis->distributed()) ? num_samples_req : 0);
58 
59  // QDEIM computes selection/interpolation indices by taking a
60  // column-pivoted QR-decomposition of the transpose of its input matrix.
61  f_basis->qrcp_pivots_transpose(f_sampled_row.data(),
62  f_sampled_row_owner.data(),
63  num_samples_req_QR);
64 
65  if (f_basis->distributed())
66  {
67  // Gather the sampled rows to root process in sampled_row_data
68 
69  // On root, f_sampled_row contains all global pivots.
70  // On non-root processes, it contains the local pivots.
71 
72  std::vector<int> ns((myid == 0) ? num_procs : 0);
73  std::vector<int> disp((myid == 0) ? num_procs : 0);
74  std::vector<int> all_sampled_rows((myid == 0) ? num_samples_req : 0);
75  if (myid == 0)
76  {
77  for (int r=0; r<num_procs; ++r)
78  ns[r] = 0;
79 
80  for (int i=0; i<num_samples_req_QR; ++i)
81  ns[f_sampled_row_owner[i]]++;
82 
83  disp[0] = 0;
84  for (int r=1; r<num_procs; ++r)
85  disp[r] = disp[r-1] + ns[r-1];
86 
87  CAROM_VERIFY(disp[num_procs-1] + ns[num_procs-1] == num_samples_req_QR);
88 
89  for (int r=0; r<num_procs; ++r)
90  {
91  f_sampled_rows_per_proc[r] = ns[r];
92  ns[r] = 0;
93  }
94 
95  for (int i=0; i<num_samples_req_QR; ++i)
96  {
97  const int owner = f_sampled_row_owner[i];
98  all_sampled_rows[disp[owner] + ns[owner]] = f_sampled_row[i];
99  ns[owner]++;
100  }
101 
102  // Reorder f_sampled_row and f_sampled_row_owner to match the order
103  // of f_basis_sampled_inv
104  for (int i=0; i<num_samples_req_QR; ++i)
105  f_sampled_row[i] = all_sampled_rows[i];
106 
107  int os = 0;
108  for (int r=0; r<num_procs; ++r)
109  {
110  for (int i=0; i<f_sampled_rows_per_proc[r]; ++i)
111  {
112  f_sampled_row_owner[os + i] = r;
113  }
114 
115  os += f_sampled_rows_per_proc[r];
116  }
117  }
118 
119  MPI_Bcast(f_sampled_rows_per_proc.data(), num_procs, MPI_INT, 0,
120  MPI_COMM_WORLD);
121 
122  int count = 0;
123  MPI_Scatter(ns.data(), 1, MPI_INT, &count, 1, MPI_INT, 0, MPI_COMM_WORLD);
124 
125  std::vector<int> my_sampled_rows(count);
126  std::vector<double> my_sampled_row_data(count*numCol);
127 
128  MPI_Scatterv(all_sampled_rows.data(), ns.data(), disp.data(), MPI_INT,
129  my_sampled_rows.data(), count, MPI_INT, 0, MPI_COMM_WORLD);
130 
131  std::vector<int> row_offset(num_procs);
132  row_offset[myid] = f_basis->numRows();
133 
134  CAROM_VERIFY(MPI_Allgather(MPI_IN_PLACE, 1, MPI_INT, row_offset.data(), 1,
135  MPI_INT, MPI_COMM_WORLD) == MPI_SUCCESS);
136 
137  int os = 0;
138  for (int i=0; i<num_procs; ++i)
139  {
140  os += row_offset[i];
141  row_offset[i] = os - row_offset[i];
142  }
143 
144  for (int i=0; i<count; ++i)
145  {
146  const int msr = my_sampled_rows[i];
147  const bool mycond = my_sampled_rows[i] >= row_offset[myid]
148  && my_sampled_rows[i] < row_offset[myid] + f_basis->numRows();
149  CAROM_VERIFY(my_sampled_rows[i] >= row_offset[myid]
150  && my_sampled_rows[i] < row_offset[myid] + f_basis->numRows());
151  const int row = my_sampled_rows[i] - row_offset[myid];
152  os = i*numCol;
153  for (int j=0; j<numCol; ++j)
154  my_sampled_row_data[os + j] = f_basis->item(row, j);
155  }
156 
157  if (myid == 0)
158  {
159  for (int r=0; r<num_procs; ++r)
160  {
161  ns[r] *= numCol;
162  disp[r] *= numCol;
163  }
164  }
165 
166  MPI_Gatherv(my_sampled_row_data.data(), count*numCol, MPI_DOUBLE,
167  sampled_row_data.data(),
168  ns.data(), disp.data(), MPI_DOUBLE, 0, MPI_COMM_WORLD);
169 
170  const int nf = f_basis->numRows();
171 
172  std::vector<int> rcnt(num_procs);
173  std::vector<int> rdsp(num_procs);
174  MPI_Gather(&nf, 1, MPI_INT, rcnt.data(), 1, MPI_INT, 0, MPI_COMM_WORLD);
175 
176  int nglobal = 0;
177  rdsp[0] = 0;
178  if (myid == 0)
179  {
180  for (int i=0; i<num_procs; ++i)
181  nglobal += rcnt[i];
182 
183  for (int i=1; i<num_procs; ++i)
184  rdsp[i] = rdsp[i-1] + rcnt[i-1];
185  }
186 
187  std::set<int> globalSamples;
188  if (myid == 0)
189  {
190  int count = 0;
191  for (int i=0; i<num_procs; ++i)
192  {
193  for (int j=0; j<f_sampled_rows_per_proc[i]; ++j, ++count)
194  {
195  globalSamples.insert(f_sampled_row[count]);
196  }
197  }
198 
199  CAROM_VERIFY(count == numCol);
200  }
201 
202  std::vector<double> rg(myid == 0 ? nglobal : 0);
203  std::vector<int> isort(myid == 0 ? nglobal : 0);
204 
205  int n = numCol;
206  Matrix V(n, n, false);
207 
208  // At this point, only the first (numCol) entries of f_sampled_row and
209  // rows of sampled_row_data are set by QR. Now set the remaining
210  // (num_samples_req - numCol) samples by GappyPOD+E.
211 
212  for (int s = numCol; s < num_samples_req; ++s) // Determine sample s
213  {
214  int m = s;
215 
216  double g = 0.0;
217  if (myid == 0)
218  {
219  Matrix A(m, n, false);
220 
221  // Compute SVD of the first s rows of sampled_row_data locally on root
222  // Use lapack's dgesdd Fortran function to perform the SVD. As this is
223  // Fortran, A and all the computed matrices are in column major order.
224  for (int i=0; i<m; ++i)
225  {
226  for (int j=0; j<n; ++j)
227  {
228  A.getData()[i + (j*m)] = sampled_row_data[(i*numCol) + j];
229  }
230  }
231  Matrix* U = NULL;
232  Vector sigma(n, false);
233  SerialSVD(&A, U, &sigma, &V);
234  delete U;
235 
236  g = (sigma.getData()[n-2] * sigma.getData()[n-2]) -
237  (sigma.getData()[n-1] * sigma.getData()[n-1]);
238 
239  // Note that V stores the right singular vectors row-wise
240  V.transpose();
241  }
242 
243  MPI_Bcast(&g, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
244 
245  // Broadcast the small n-by-n undistributed matrix V which is computed only on root.
246  MPI_Bcast(V.getData(), n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
247 
248  // Set Ubt = U * V = (V' * U')'
249  Matrix *Ubt = f_basis->mult(V); // distributed
250 
251  CAROM_VERIFY(Ubt->distributed() && Ubt->numRows() == f_basis->numRows()
252  && Ubt->numColumns() == n);
253 
254  std::vector<double> r(nf);
255 
256  for (int i=0; i<nf; ++i)
257  {
258  r[i] = g;
259  // column sums of Ub.^2, which are row sums of Ubt.^2
260  for (int j=0; j<n; ++j)
261  r[i] += Ubt->item(i, j) * Ubt->item(i,j);
262 
263  r[i] -= sqrt((r[i] * r[i]) - (4.0 * g * Ubt->item(i, n-1) * Ubt->item(i, n-1)));
264  }
265 
266  MPI_Gatherv(r.data(), nf, MPI_DOUBLE, rg.data(), rcnt.data(), rdsp.data(),
267  MPI_DOUBLE, 0, MPI_COMM_WORLD);
268 
269  int owner = -1;
270  if (myid == 0)
271  {
272  for (int i=0; i<nglobal; ++i)
273  {
274  isort[i] = i;
275  }
276 
277  std::sort(isort.begin(), isort.end(), [&](const int& a, const int& b) {
278  return (rg[a] > rg[b]);
279  }
280  ); // descending order
281 
282  // Choose sample s as the first entry in isort not already in f_sampled_row
283  f_sampled_row[s] = -1;
284  for (int i=0; i<nglobal; ++i)
285  {
286  std::set<int>::iterator it = globalSamples.find(isort[i]);
287  if (it == globalSamples.end()) // not found
288  {
289  CAROM_VERIFY(i <= s && f_sampled_row[s] == -1);
290  f_sampled_row[s] = isort[i];
291  break;
292  }
293  }
294 
295  CAROM_VERIFY(f_sampled_row[s] >= 0);
296  for (int i=num_procs-1; i >= 0; --i)
297  {
298  if (f_sampled_row[s] >= row_offset[i])
299  {
300  owner = i;
301  break;
302  }
303  }
304 
305  CAROM_VERIFY(owner >= 0);
306  f_sampled_rows_per_proc[owner]++;
307  f_sampled_row_owner[s] = owner;
308 
309  for (int i=0; i < num_procs; ++i)
310  ns[i] = -1;
311 
312  ns[owner] = f_sampled_row[s];
313  globalSamples.insert(f_sampled_row[s]);
314  }
315 
316  // Send one row of f_basis, corresponding to sample s, to the root
317  // process for f_basis_sampled_inv. First, scatter from root to tell
318  // the owning process the sample index.
319 
320  int sample = -1;
321  MPI_Scatter(ns.data(), 1, MPI_INT, &sample, 1, MPI_INT, 0, MPI_COMM_WORLD);
322 
323  const int tagSendRecv = 111;
324  if (sample > -1)
325  {
326  CAROM_VERIFY(sample >= row_offset[myid]);
327  MPI_Send(f_basis->getData() + ((sample - row_offset[myid]) * numCol),
328  numCol, MPI_DOUBLE, 0, tagSendRecv, MPI_COMM_WORLD);
329  }
330 
331  if (myid == 0)
332  {
333  MPI_Status status;
334  MPI_Recv(sampled_row_data.data() + (s*numCol), numCol, MPI_DOUBLE,
335  owner, tagSendRecv, MPI_COMM_WORLD, &status);
336  }
337 
338  delete Ubt;
339  } // loop s over samples
340 
341  // Subtract row_offset to convert f_sampled_row from global to local indices.
342  // Also, reorder f_sampled_row by process.
343  if (myid == 0)
344  {
345  ns[0] = 0;
346  disp[0] = 0;
347  for (int r=1; r<num_procs; ++r)
348  {
349  ns[r] = 0;
350  disp[r] = disp[r-1] + f_sampled_rows_per_proc[r-1];
351  }
352 
353  for (int i=0; i<num_samples_req; ++i)
354  {
355  const int owner = f_sampled_row_owner[i];
356  f_sampled_row[i] -= row_offset[owner];
357 
358  all_sampled_rows[disp[owner] + ns[owner]] = i;
359  ns[owner]++;
360  }
361 
362  std::vector<int> sortedRow(num_samples_req);
363 
364  for (int r=0; r<num_procs; ++r)
365  {
366  CAROM_VERIFY(ns[r] == f_sampled_rows_per_proc[r]);
367 
368  // Sort the local indices of f_sampled_row for process r.
369  for (int i=0; i<ns[r]; ++i)
370  {
371  isort[i] = i;
372  }
373 
374  std::sort(isort.begin(), isort.begin() + ns[r], [&](const int& a,
375  const int& b) {
376  return (f_sampled_row[all_sampled_rows[disp[r] + a]] <
377  f_sampled_row[all_sampled_rows[disp[r] + b]]);
378  }
379  ); // ascending order locally
380 
381  for (int i=0; i<ns[r]; ++i)
382  {
383  const int ig = all_sampled_rows[disp[r] + isort[i]];
384  const int s = (disp[r] + i);
385  // Put row ig of sampled_row_data into row s of f_basis_sampled_inv
386  // Put entry ig of f_sampled_row into entry s of sortedRow
387 
388  for (int j=0; j<numCol; ++j)
389  {
390  f_basis_sampled_inv.item(s, j) = sampled_row_data[(ig*numCol) + j];
391  }
392 
393  sortedRow[s] = f_sampled_row[ig];
394  }
395  }
396 
397  for (int i=0; i<num_samples_req; ++i)
398  f_sampled_row[i] = sortedRow[i];
399  }
400 
401  MPI_Bcast(f_sampled_row.data(), num_samples_req, MPI_INT, 0, MPI_COMM_WORLD);
402  MPI_Bcast(f_sampled_rows_per_proc.data(), num_procs, MPI_INT, 0,
403  MPI_COMM_WORLD);
404  }
405  else
406  {
407  // With the known interpolation (sample) indices, copy over the
408  // rows of the sampled basis
409  for (int i = 0; i < num_samples_req_QR; i++) {
410  for (int j = 0; j < numCol; j++) {
411  f_basis_sampled_inv.item(i, j) = f_basis->item(f_sampled_row[i], j);
412  }
413  }
414 
415  f_sampled_rows_per_proc[0] = numCol;
416  }
417 
418  if (!f_basis->distributed())
419  {
420  // GappyPOD+E not implemented for oversampling if not distributed
421  CAROM_VERIFY(numCol == num_samples_req);
422  }
423 
424  // Now invert f_basis_sampled_inv, storing its transpose.
425  if (myid == 0) // Matrix is valid only on root process
426  f_basis_sampled_inv.transposePseudoinverse();
427 } // end void QDEIM
428 
429 } // end namespace CAROM
double * getData() const
Get the matrix data as a pointer.
Definition: Matrix.h:1107
void transposePseudoinverse()
Computes the transposePseudoinverse of this.
Definition: Matrix.cpp:882
bool distributed() const
Returns true if the Matrix is distributed.
Definition: Matrix.h:177
int numColumns() const
Returns the number of columns in the Matrix. This method will return the same value from each process...
Definition: Matrix.h:220
Matrix * mult(const Matrix &other) const
Multiplies this Matrix with other and returns the product, reference version.
Definition: Matrix.h:283
const double & item(int row, int col) const
Const Matrix member access. Matrix data is stored in row-major format.
Definition: Matrix.h:1007
int numRows() const
Returns the number of rows of the Matrix on this processor.
Definition: Matrix.h:194
void transpose()
Replaces this Matrix with its transpose (in place), in the serial square case only.
Definition: Matrix.cpp:824
void qrcp_pivots_transpose(int *row_pivot, int *row_pivot_owner, int pivots_requested) const
Compute the leading numColumns() column pivots from a QR decomposition with column pivots (QRCP) of t...
Definition: Matrix.cpp:1198
double * getData() const
Get the vector data as a pointer.
Definition: Vector.h:748
void SerialSVD(Matrix *A, Matrix *U, Vector *S, Matrix *V)
Computes the SVD of a undistributed matrix in serial.
Definition: Matrix.cpp:2279
void QDEIM(const Matrix *f_basis, int num_f_basis_vectors_used, std::vector< int > &f_sampled_row, std::vector< int > &f_sampled_rows_per_proc, Matrix &f_basis_sampled_inv, const int myid, const int num_procs, const int num_samples_req)
Computes the QDEIM algorithm on the given basis.
Definition: QDEIM.cpp:25