libROM  v1.0
Data-driven physical simulation library
S_OPT.cpp
1 
11 // Description: Interface to the S_OPT algorithm to determine the rows of the
12 // basis to be sampled for the interpolation of the basis.
13 
14 #include "linalg/Matrix.h"
15 #include "Utilities.h"
16 #include "mpi.h"
17 #include <cmath>
18 #include <vector>
19 #include <map>
20 #include <set>
21 #include <cfloat>
22 
23 #include "S_OPT.h"
24 
25 using namespace std;
26 
27 namespace CAROM {
28 
29 void
30 S_OPT(const Matrix* f_basis,
31  int num_f_basis_vectors_used,
32  std::vector<int>& f_sampled_row,
33  std::vector<int>& f_sampled_rows_per_proc,
34  Matrix& f_basis_sampled_inv,
35  const int myid,
36  const int num_procs,
37  const int num_samples_req,
38  std::vector<int>* init_samples,
39  bool qr_factorize)
40 {
41  CAROM_VERIFY(num_procs == f_sampled_rows_per_proc.size());
42  // This algorithm determines the rows of f that should be sampled, the
43  // processor that owns each sampled row, and fills f_basis_sampled_inv with
44  // the inverse of the sampled rows of the basis.
45 
46  // Create an MPI_Datatype for the RowInfo struct.
47  MPI_Datatype MaxRowType, oldtypes[2];
48  int blockcounts[2];
49  MPI_Aint offsets[2], extent, lower_bound;
50  MPI_Status stat;
51  offsets[0] = 0;
52  oldtypes[0] = MPI_DOUBLE;
53  blockcounts[0] = 1;
54 
55  /*
56  MPI_Type_extent is deprecated in MPI-2 and removed in
57  MPI-3. MPI_Type_extent has been replaced by MPI_Type_get_extent.
58  */
59 #if (MPI_VERSION == 1)
60  MPI_Type_extent(MPI_DOUBLE, &extent);
61 #else
62  MPI_Type_get_extent(MPI_DOUBLE, &lower_bound, &extent);
63 #endif
64 
65  offsets[1] = extent;
66  oldtypes[1] = MPI_INT;
67  blockcounts[1] = 2;
68 
69  /*
70  MPI_Type_struct is deprecated in MPI-2 and removed in
71  MPI-3. MPI_Type_struct has been replaced by MPI_Type_create_struct.
72  */
73 #if (MPI_VERSION == 1)
74  MPI_Type_struct(2, blockcounts, offsets, oldtypes, &MaxRowType);
75 #else
76  MPI_Type_create_struct(2, blockcounts, offsets, oldtypes, &MaxRowType);
77 #endif
78  MPI_Type_commit(&MaxRowType);
79 
80  // Create an MPI_Op for the RowInfoMax function.
81  MPI_Op RowInfoOp;
82  MPI_Op_create((MPI_User_function*)RowInfoMax, true, &RowInfoOp);
83 
84  // Get the number of basis vectors and the size of each basis vector.
85  CAROM_VERIFY(0 < num_f_basis_vectors_used
86  && num_f_basis_vectors_used <= f_basis->numColumns());
87  const int num_basis_vectors =
88  std::min(num_f_basis_vectors_used, f_basis->numColumns());
89  const int num_samples = num_samples_req > 0 ? num_samples_req :
90  num_basis_vectors;
91  CAROM_VERIFY(num_basis_vectors <= num_samples
92  && num_samples <= f_basis->numDistributedRows());
93  CAROM_VERIFY(num_samples == f_sampled_row.size());
94  CAROM_VERIFY(num_samples == f_basis_sampled_inv.numRows() &&
95  num_basis_vectors == f_basis_sampled_inv.numColumns());
96  CAROM_VERIFY(!f_basis_sampled_inv.distributed());
97 
98  int num_rows = f_basis->numRows();
99 
100  // If num_basis_vectors is less than the number of columns of the basis,
101  // we need to truncate the basis.
102  const Matrix* f_basis_truncated = NULL;
103  if (num_basis_vectors < f_basis->numColumns())
104  {
105  f_basis_truncated = f_basis->getFirstNColumns(num_basis_vectors);
106  }
107  else
108  {
109  f_basis_truncated = f_basis;
110  }
111 
112  const Matrix* Vo = NULL;
113 
114  // Use the QR factorization of the input matrix, if requested
115  if (qr_factorize)
116  {
117  Vo = f_basis_truncated->qr_factorize();
118  }
119  else
120  {
121  Vo = f_basis_truncated;
122  }
123 
124  int num_samples_obtained = 0;
125 
126  // Scratch space used throughout the algorithm.
127  std::vector<double> c(num_basis_vectors);
128 
129  vector<set<int> > proc_sampled_f_row(num_procs);
130  vector<map<int, int> > proc_f_row_to_tmp_fs_row(num_procs);
131  int num_f_basis_cols = f_basis_sampled_inv.numColumns();
132  Matrix V1(num_samples,
133  num_basis_vectors,
134  false);
135 
136  RowInfo f_bv_max_local, f_bv_max_global;
137 
138  // Gather information about initial samples given as input.
139  const int num_init_samples = init_samples ? init_samples->size() : 0;
140  int total_num_init_samples = 0;
141  MPI_Allreduce(&num_init_samples, &total_num_init_samples, 1,
142  MPI_INT, MPI_SUM, MPI_COMM_WORLD);
143  CAROM_VERIFY(num_samples >= total_num_init_samples);
144 
145  int init_sample_offset = 0;
146  for (int i = 0; i < total_num_init_samples; i++)
147  {
148  f_bv_max_local.row_val = -DBL_MAX;
149  f_bv_max_local.proc = myid;
150  if (init_sample_offset < num_init_samples)
151  {
152  f_bv_max_local.row_val = 1.0;
153  f_bv_max_local.row = (*init_samples)[init_sample_offset];
154  }
155  MPI_Allreduce(&f_bv_max_local, &f_bv_max_global, 1,
156  MaxRowType, RowInfoOp, MPI_COMM_WORLD);
157  // Now get the first sampled row of the basis
158  if (f_bv_max_global.proc == myid) {
159  for (int j = 0; j < num_basis_vectors; ++j) {
160  c[j] = Vo->item(f_bv_max_global.row, j);
161  }
162  init_sample_offset++;
163  }
164  MPI_Bcast(c.data(), num_basis_vectors, MPI_DOUBLE,
165  f_bv_max_global.proc, MPI_COMM_WORLD);
166  // Now add the first sampled row of the basis to tmp_fs.
167  for (int j = 0; j < num_basis_vectors; ++j) {
168  V1.item(num_samples_obtained, j) = c[j];
169  }
170  proc_sampled_f_row[f_bv_max_global.proc].insert(f_bv_max_global.row);
171  proc_f_row_to_tmp_fs_row[f_bv_max_global.proc][f_bv_max_global.row] =
172  num_samples_obtained;
173  num_samples_obtained++;
174  }
175 
176  if (num_samples_obtained == 0)
177  {
178  f_bv_max_local.row_val = -DBL_MAX;
179  f_bv_max_local.proc = myid;
180  for (int i = 0; i < num_rows; ++i) {
181  double f_bv_val = fabs(Vo->item(i, 0));
182  if (f_bv_val > f_bv_max_local.row_val) {
183  f_bv_max_local.row_val = f_bv_val;
184  f_bv_max_local.row = i;
185  }
186  }
187  MPI_Allreduce(&f_bv_max_local, &f_bv_max_global, 1,
188  MaxRowType, RowInfoOp, MPI_COMM_WORLD);
189  // Now get the first sampled row of the basis
190  if (f_bv_max_global.proc == myid) {
191  for (int j = 0; j < num_basis_vectors; ++j) {
192  c[j] = Vo->item(f_bv_max_global.row, j);
193  }
194  }
195  MPI_Bcast(c.data(), num_basis_vectors, MPI_DOUBLE,
196  f_bv_max_global.proc, MPI_COMM_WORLD);
197  // Now add the first sampled row of the basis to tmp_fs.
198  for (int j = 0; j < num_basis_vectors; ++j) {
199  V1.item(0, j) = c[j];
200  }
201  proc_sampled_f_row[f_bv_max_global.proc].insert(f_bv_max_global.row);
202  proc_f_row_to_tmp_fs_row[f_bv_max_global.proc][f_bv_max_global.row] = 0;
203  num_samples_obtained++;
204  }
205 
206  if (num_samples_obtained < num_samples)
207  {
208  Vector* A = new Vector(num_rows, f_basis->distributed());
209  Vector* noM = new Vector(num_rows, f_basis->distributed());
210 
211  Matrix A0(num_basis_vectors - 1, num_basis_vectors - 1, false);
212  Matrix V1_last_col(num_basis_vectors - 1, 1, false);
213  Matrix tt(num_rows, num_basis_vectors - 1, f_basis->distributed());
214  Matrix tt1(num_rows, num_basis_vectors - 1, f_basis->distributed());
215  Matrix g1(tt.numRows(), tt.numColumns(), f_basis->distributed());
216  Matrix GG(tt1.numRows(), tt1.numColumns(), f_basis->distributed());
217  Vector ls_res_first_row(num_basis_vectors - 1, false);
218  Vector nV(num_basis_vectors, false);
219 
220  int start_idx = 2;
221  if (total_num_init_samples > 1)
222  {
223  start_idx += (total_num_init_samples - 1);
224  }
225  for (int i = start_idx; i <= num_samples; i++)
226  {
227  if (i <= num_basis_vectors)
228  {
229  A0.setSize(i - 1, i - 1);
230  V1_last_col.setSize(i - 1, 1);
231  for (int j = 0; j < num_samples_obtained; j++)
232  {
233  for (int k = 0; k < i - 1; k++)
234  {
235  A0.item(j, k) = V1.item(j, k);
236  }
237  V1_last_col.item(j, 0) = V1.item(j, i - 1);
238  }
239 
240  Matrix* atA0 = V1_last_col.transposeMult(A0);
241  tt.setSize(num_rows, i - 1);
242  tt1.setSize(num_rows, i - 1);
243 
244  double ata = 0.0;
245  for (int j = 0; j < V1_last_col.numRows(); j++)
246  {
247  for (int k = 0; k < V1_last_col.numColumns(); k++)
248  {
249  ata += (V1_last_col.item(j, k) * V1_last_col.item(j, k));
250  }
251  }
252 
253  Matrix* lhs = A0.transposeMult(A0);
254  lhs->inverse();
255  Matrix* rhs = NULL;
256  if (myid == 0)
257  {
258  rhs = new Matrix(num_rows + atA0->numRows(), i - 1, f_basis->distributed());
259  for (int k = 0; k < rhs->numColumns(); k++)
260  {
261  rhs->item(0, k) = atA0->item(0, k);
262  }
263  for (int j = 1; j < rhs->numRows(); j++)
264  {
265  for (int k = 0; k < rhs->numColumns(); k++)
266  {
267  rhs->item(j, k) = Vo->item(j - 1, k);
268  }
269  }
270  }
271  else
272  {
273  rhs = new Matrix(num_rows, i - 1, f_basis->distributed());
274  for (int j = 0; j < rhs->numRows(); j++)
275  {
276  for (int k = 0; k < rhs->numColumns(); k++)
277  {
278  rhs->item(j, k) = Vo->item(j, k);
279  }
280  }
281  }
282 
283  Matrix* ls_res = rhs->mult(lhs);
284  delete lhs;
285  delete rhs;
286 
287  Matrix* c_T = NULL;
288  if (myid == 0)
289  {
290  c_T = new Matrix(ls_res->getData() + ls_res->numColumns(),
291  ls_res->numRows() - 1, ls_res->numColumns(), f_basis->distributed(), true);
292  }
293  else
294  {
295  c_T = new Matrix(ls_res->getData(),
296  ls_res->numRows(), ls_res->numColumns(), f_basis->distributed(), true);
297  }
298  Matrix* Vo_first_i_columns = Vo->getFirstNColumns(i - 1);
299 
300  Vector* b = new Vector(num_rows, f_basis->distributed());
301  for (int j = 0; j < Vo_first_i_columns->numRows(); j++)
302  {
303  double tmp = 1.0;
304  for (int k = 0; k < Vo_first_i_columns->numColumns(); k++)
305  {
306  tmp += (Vo_first_i_columns->item(j, k) * c_T->item(j, k));
307  }
308  b->item(j) = tmp;
309  }
310 
311  delete Vo_first_i_columns;
312 
313  for (int j = 0; j < num_rows; j++)
314  {
315  for (int zz = 0; zz < i - 1; zz++)
316  {
317  tt.item(j, zz) = Vo->item(j, zz) * Vo->item(j, i - 1);
318  }
319  }
320 
321  g1.setSize(tt.numRows(), tt.numColumns());
322  for (int j = 0; j < g1.numRows(); j++)
323  {
324  for (int k = 0; k < g1.numColumns(); k++)
325  {
326  g1.item(j, k) = atA0->item(0, k) + tt.item(j, k);
327  }
328  }
329 
330  delete atA0;
331 
332  Vector* g3 = new Vector(num_rows, f_basis->distributed());
333  for (int j = 0; j < c_T->numRows(); j++)
334  {
335  double tmp = 0.0;
336  for (int k = 0; k < c_T->numColumns(); k++)
337  {
338  tmp += c_T->item(j, k) * g1.item(j, k);
339  }
340  g3->item(j) = tmp / b->item(j);
341  }
342 
343  for (int j = 0; j < num_rows; j++)
344  {
345  for (int zz = 0; zz < i - 1; zz++)
346  {
347  tt1.item(j, zz) = c_T->item(j, zz) * (Vo->item(j, i - 1) - g3->item(j));
348  }
349  }
350 
351  delete c_T;
352  delete g3;
353 
354  ls_res_first_row.setSize(ls_res->numColumns());
355  if (myid == 0) {
356  for (int j = 0; j < ls_res->numColumns(); ++j) {
357  c[j] = ls_res->item(0, j);
358  }
359  }
360  MPI_Bcast(c.data(), ls_res->numColumns(), MPI_DOUBLE,
361  0, MPI_COMM_WORLD);
362  for (int j = 0; j < ls_res->numColumns(); ++j) {
363  ls_res_first_row.item(j) = c[j];
364  }
365  GG.setSize(tt1.numRows(), tt1.numColumns());
366  for (int j = 0; j < GG.numRows(); j++)
367  {
368  for (int k = 0; k < GG.numColumns(); k++)
369  {
370  GG.item(j, k) = ls_res_first_row.item(k) + tt1.item(j, k);
371  }
372  }
373 
374  delete ls_res;
375 
376  for (int j = 0; j < A->dim(); j++)
377  {
378  double tmp = 0.0;
379  for (int k = 0; k < g1.numColumns(); k++)
380  {
381  tmp += g1.item(j, k) * GG.item(j, k);
382  }
383  A->item(j) = std::max(0.0, ata +
384  (Vo->item(j, i - 1) * Vo->item(j, i - 1)) - tmp);
385  }
386 
387  nV.setSize(i);
388  for (int j = 0; j < i; j++)
389  {
390  nV.item(j) = 0.0;
391  for (int k = 0; k < num_samples_obtained; k++)
392  {
393  nV.item(j) += (V1.item(k, j) * V1.item(k, j));
394  }
395  }
396 
397  for (int j = 0; j < noM->dim(); j++)
398  {
399  noM->item(j) = 0.0;
400  for (int k = 0; k < i; k++)
401  {
402  noM->item(j) += std::log(nV.item(k) + (Vo->item(j, k) * Vo->item(j, k)));
403  }
404  }
405 
406  for (int j = 0; j < A->dim(); j++)
407  {
408  A->item(j) = std::log(fabs(A->item(j))) + std::log(b->item(j)) - noM->item(j);
409  }
410 
411  delete b;
412  }
413  else
414  {
415  Matrix* curr_V1 = new Matrix(V1.getData(), num_samples_obtained,
416  num_basis_vectors, false, true);
417  Matrix* lhs = curr_V1->transposeMult(curr_V1);
418  lhs->inverse();
419 
420  delete curr_V1;
421 
422  Matrix* ls_res = Vo->mult(lhs);
423  delete lhs;
424 
425  nV.setSize(num_basis_vectors);
426  for (int j = 0; j < num_basis_vectors; j++)
427  {
428  nV.item(j) = 0.0;
429  for (int k = 0; k < num_samples_obtained; k++)
430  {
431  nV.item(j) += (V1.item(k, j) * V1.item(k, j));
432  }
433  }
434 
435  for (int j = 0; j < noM->dim(); j++)
436  {
437  noM->item(j) = 0.0;
438  for (int k = 0; k < num_basis_vectors; k++)
439  {
440  noM->item(j) += std::log(nV.item(k) + (Vo->item(j, k) * Vo->item(j, k)));
441  }
442  }
443 
444  for (int j = 0; j < A->dim(); j++)
445  {
446  double tmp = 0.0;
447  for (int k = 0; k < Vo->numColumns(); k++)
448  {
449  tmp += Vo->item(j, k) * ls_res->item(j, k);
450  }
451  A->item(j) = std::log(1 + tmp) - noM->item(j);
452  }
453 
454  delete ls_res;
455  }
456 
457  f_bv_max_local.row_val = -DBL_MAX;
458  f_bv_max_local.proc = myid;
459  for (int j = 0; j < num_rows; ++j) {
460  if (proc_f_row_to_tmp_fs_row[f_bv_max_local.proc].find(j) ==
461  proc_f_row_to_tmp_fs_row[f_bv_max_local.proc].end())
462  {
463  double f_bv_val = A->item(j);
464  if (f_bv_val > f_bv_max_local.row_val) {
465  f_bv_max_local.row_val = f_bv_val;
466  f_bv_max_local.row = j;
467  }
468  }
469  }
470  MPI_Allreduce(&f_bv_max_local, &f_bv_max_global, 1,
471  MaxRowType, RowInfoOp, MPI_COMM_WORLD);
472  // Now get the first sampled row of the basis
473  if (f_bv_max_global.proc == myid) {
474  for (int j = 0; j < num_basis_vectors; ++j) {
475  c[j] = Vo->item(f_bv_max_global.row, j);
476  }
477  }
478  MPI_Bcast(c.data(), num_basis_vectors, MPI_DOUBLE,
479  f_bv_max_global.proc, MPI_COMM_WORLD);
480  // Now add the first sampled row of the basis to tmp_fs.
481  for (int j = 0; j < num_basis_vectors; ++j) {
482  V1.item(num_samples_obtained, j) = c[j];
483  }
484  proc_sampled_f_row[f_bv_max_global.proc].insert(f_bv_max_global.row);
485  proc_f_row_to_tmp_fs_row[f_bv_max_global.proc][f_bv_max_global.row] =
486  num_samples_obtained;
487  num_samples_obtained++;
488  }
489 
490  delete A;
491  delete noM;
492  }
493 
494  // Fill f_sampled_row, and f_sampled_rows_per_proc. Unscramble tmp_fs into
495  // f_basis_sampled_inv.
496  int idx = 0;
497  for (int i = 0; i < num_procs; ++i) {
498  set<int>& this_proc_sampled_f_row = proc_sampled_f_row[i];
499  map<int, int>& this_proc_f_row_to_tmp_fs_row =
500  proc_f_row_to_tmp_fs_row[i];
501  f_sampled_rows_per_proc[i] = this_proc_sampled_f_row.size();
502  for (set<int>::iterator j = this_proc_sampled_f_row.begin();
503  j != this_proc_sampled_f_row.end(); ++j) {
504  int this_f_row = *j;
505  f_sampled_row[idx] = this_f_row;
506  int tmp_fs_row = this_proc_f_row_to_tmp_fs_row[this_f_row];
507  for (int col = 0; col < num_f_basis_cols; ++col) {
508  f_basis_sampled_inv.item(idx, col) = V1.item(tmp_fs_row, col);
509  }
510  ++idx;
511  }
512  }
513 
514  CAROM_ASSERT(num_samples == idx);
515 
516  // Now invert f_basis_sampled_inv, storing its transpose.
517  f_basis_sampled_inv.transposePseudoinverse();
518 
519  // Free the MPI_Datatype and MPI_Op.
520  MPI_Type_free(&MaxRowType);
521  MPI_Op_free(&RowInfoOp);
522 
523  if (qr_factorize)
524  {
525  delete Vo;
526  }
527  if (num_basis_vectors < f_basis->numColumns())
528  {
529  delete f_basis_truncated;
530  }
531 }
532 
533 }
void setSize(int num_rows, int num_cols)
Sets the number of rows and columns of the matrix and reallocates storage if needed....
Definition: Matrix.h:147
Matrix * getFirstNColumns(int n) const
Get the first N columns of a matrix.
Definition: Matrix.cpp:256
double * getData() const
Get the matrix data as a pointer.
Definition: Matrix.h:1107
Matrix * inverse() const
Computes and returns the inverse of this.
Definition: Matrix.h:814
void transposePseudoinverse()
Computes the transposePseudoinverse of this.
Definition: Matrix.cpp:882
bool distributed() const
Returns true if the Matrix is distributed.
Definition: Matrix.h:177
int numColumns() const
Returns the number of columns in the Matrix. This method will return the same value from each process...
Definition: Matrix.h:220
Matrix * transposeMult(const Matrix &other) const
Multiplies the transpose of this Matrix with other and returns the product, reference version.
Definition: Matrix.h:642
Matrix * mult(const Matrix &other) const
Multiplies this Matrix with other and returns the product, reference version.
Definition: Matrix.h:283
const double & item(int row, int col) const
Const Matrix member access. Matrix data is stored in row-major format.
Definition: Matrix.h:1007
int numRows() const
Returns the number of rows of the Matrix on this processor.
Definition: Matrix.h:194
Matrix * qr_factorize() const
Computes and returns the Q of the QR factorization of this.
Definition: Matrix.cpp:1125
void setSize(int dim)
Sets the length of the vector and reallocates storage if needed. All values are initialized to zero.
Definition: Vector.h:231
const double & item(int i) const
Const Vector member access.
Definition: Vector.h:656
int dim() const
Returns the dimension of the Vector on this processor.
Definition: Vector.h:266
void S_OPT(const Matrix *f_basis, int num_f_basis_vectors_used, std::vector< int > &f_sampled_row, std::vector< int > &f_sampled_rows_per_proc, Matrix &f_basis_sampled_inv, const int myid, const int num_procs, const int num_samples_req, std::vector< int > *init_samples, bool qr_factorize)
Computes the S_OPT algorithm on the given basis.
Definition: S_OPT.cpp:30