14 #include "linalg/Matrix.h"
15 #include "Utilities.h"
31 int num_f_basis_vectors_used,
32 std::vector<int>& f_sampled_row,
33 std::vector<int>& f_sampled_rows_per_proc,
34 Matrix& f_basis_sampled_inv,
37 const int num_samples_req,
38 std::vector<int>* init_samples,
41 CAROM_VERIFY(num_procs == f_sampled_rows_per_proc.size());
47 MPI_Datatype MaxRowType, oldtypes[2];
49 MPI_Aint offsets[2], extent, lower_bound;
52 oldtypes[0] = MPI_DOUBLE;
59 #if (MPI_VERSION == 1)
60 MPI_Type_extent(MPI_DOUBLE, &extent);
62 MPI_Type_get_extent(MPI_DOUBLE, &lower_bound, &extent);
66 oldtypes[1] = MPI_INT;
73 #if (MPI_VERSION == 1)
74 MPI_Type_struct(2, blockcounts, offsets, oldtypes, &MaxRowType);
76 MPI_Type_create_struct(2, blockcounts, offsets, oldtypes, &MaxRowType);
78 MPI_Type_commit(&MaxRowType);
82 MPI_Op_create((MPI_User_function*)RowInfoMax,
true, &RowInfoOp);
85 CAROM_VERIFY(0 < num_f_basis_vectors_used
86 && num_f_basis_vectors_used <= f_basis->numColumns());
87 const int num_basis_vectors =
88 std::min(num_f_basis_vectors_used, f_basis->
numColumns());
89 const int num_samples = num_samples_req > 0 ? num_samples_req :
91 CAROM_VERIFY(num_basis_vectors <= num_samples
92 && num_samples <= f_basis->numDistributedRows());
93 CAROM_VERIFY(num_samples == f_sampled_row.size());
94 CAROM_VERIFY(num_samples == f_basis_sampled_inv.
numRows() &&
95 num_basis_vectors == f_basis_sampled_inv.
numColumns());
98 int num_rows = f_basis->
numRows();
102 const Matrix* f_basis_truncated = NULL;
103 if (num_basis_vectors < f_basis->numColumns())
109 f_basis_truncated = f_basis;
121 Vo = f_basis_truncated;
124 int num_samples_obtained = 0;
127 std::vector<double> c(num_basis_vectors);
129 vector<set<int> > proc_sampled_f_row(num_procs);
130 vector<map<int, int> > proc_f_row_to_tmp_fs_row(num_procs);
131 int num_f_basis_cols = f_basis_sampled_inv.
numColumns();
136 RowInfo f_bv_max_local, f_bv_max_global;
139 const int num_init_samples = init_samples ? init_samples->size() : 0;
140 int total_num_init_samples = 0;
141 MPI_Allreduce(&num_init_samples, &total_num_init_samples, 1,
142 MPI_INT, MPI_SUM, MPI_COMM_WORLD);
143 CAROM_VERIFY(num_samples >= total_num_init_samples);
145 int init_sample_offset = 0;
146 for (
int i = 0; i < total_num_init_samples; i++)
148 f_bv_max_local.row_val = -DBL_MAX;
149 f_bv_max_local.proc = myid;
150 if (init_sample_offset < num_init_samples)
152 f_bv_max_local.row_val = 1.0;
153 f_bv_max_local.row = (*init_samples)[init_sample_offset];
155 MPI_Allreduce(&f_bv_max_local, &f_bv_max_global, 1,
156 MaxRowType, RowInfoOp, MPI_COMM_WORLD);
158 if (f_bv_max_global.proc == myid) {
159 for (
int j = 0; j < num_basis_vectors; ++j) {
160 c[j] = Vo->
item(f_bv_max_global.row, j);
162 init_sample_offset++;
164 MPI_Bcast(c.data(), num_basis_vectors, MPI_DOUBLE,
165 f_bv_max_global.proc, MPI_COMM_WORLD);
167 for (
int j = 0; j < num_basis_vectors; ++j) {
168 V1.
item(num_samples_obtained, j) = c[j];
170 proc_sampled_f_row[f_bv_max_global.proc].insert(f_bv_max_global.row);
171 proc_f_row_to_tmp_fs_row[f_bv_max_global.proc][f_bv_max_global.row] =
172 num_samples_obtained;
173 num_samples_obtained++;
176 if (num_samples_obtained == 0)
178 f_bv_max_local.row_val = -DBL_MAX;
179 f_bv_max_local.proc = myid;
180 for (
int i = 0; i < num_rows; ++i) {
181 double f_bv_val = fabs(Vo->
item(i, 0));
182 if (f_bv_val > f_bv_max_local.row_val) {
183 f_bv_max_local.row_val = f_bv_val;
184 f_bv_max_local.row = i;
187 MPI_Allreduce(&f_bv_max_local, &f_bv_max_global, 1,
188 MaxRowType, RowInfoOp, MPI_COMM_WORLD);
190 if (f_bv_max_global.proc == myid) {
191 for (
int j = 0; j < num_basis_vectors; ++j) {
192 c[j] = Vo->
item(f_bv_max_global.row, j);
195 MPI_Bcast(c.data(), num_basis_vectors, MPI_DOUBLE,
196 f_bv_max_global.proc, MPI_COMM_WORLD);
198 for (
int j = 0; j < num_basis_vectors; ++j) {
199 V1.
item(0, j) = c[j];
201 proc_sampled_f_row[f_bv_max_global.proc].insert(f_bv_max_global.row);
202 proc_f_row_to_tmp_fs_row[f_bv_max_global.proc][f_bv_max_global.row] = 0;
203 num_samples_obtained++;
206 if (num_samples_obtained < num_samples)
211 Matrix A0(num_basis_vectors - 1, num_basis_vectors - 1,
false);
212 Matrix V1_last_col(num_basis_vectors - 1, 1,
false);
217 Vector ls_res_first_row(num_basis_vectors - 1,
false);
218 Vector nV(num_basis_vectors,
false);
221 if (total_num_init_samples > 1)
223 start_idx += (total_num_init_samples - 1);
225 for (
int i = start_idx; i <= num_samples; i++)
227 if (i <= num_basis_vectors)
231 for (
int j = 0; j < num_samples_obtained; j++)
233 for (
int k = 0; k < i - 1; k++)
237 V1_last_col.
item(j, 0) = V1.
item(j, i - 1);
245 for (
int j = 0; j < V1_last_col.
numRows(); j++)
247 for (
int k = 0; k < V1_last_col.
numColumns(); k++)
249 ata += (V1_last_col.
item(j, k) * V1_last_col.
item(j, k));
263 for (
int j = 1; j < rhs->
numRows(); j++)
267 rhs->
item(j, k) = Vo->
item(j - 1, k);
274 for (
int j = 0; j < rhs->
numRows(); j++)
301 for (
int j = 0; j < Vo_first_i_columns->
numRows(); j++)
304 for (
int k = 0; k < Vo_first_i_columns->
numColumns(); k++)
306 tmp += (Vo_first_i_columns->
item(j, k) * c_T->
item(j, k));
311 delete Vo_first_i_columns;
313 for (
int j = 0; j < num_rows; j++)
315 for (
int zz = 0; zz < i - 1; zz++)
322 for (
int j = 0; j < g1.
numRows(); j++)
333 for (
int j = 0; j < c_T->
numRows(); j++)
338 tmp += c_T->
item(j, k) * g1.
item(j, k);
343 for (
int j = 0; j < num_rows; j++)
345 for (
int zz = 0; zz < i - 1; zz++)
356 for (
int j = 0; j < ls_res->
numColumns(); ++j) {
357 c[j] = ls_res->
item(0, j);
360 MPI_Bcast(c.data(), ls_res->
numColumns(), MPI_DOUBLE,
362 for (
int j = 0; j < ls_res->
numColumns(); ++j) {
363 ls_res_first_row.
item(j) = c[j];
366 for (
int j = 0; j < GG.
numRows(); j++)
370 GG.
item(j, k) = ls_res_first_row.
item(k) + tt1.
item(j, k);
376 for (
int j = 0; j < A->
dim(); j++)
381 tmp += g1.
item(j, k) * GG.
item(j, k);
383 A->
item(j) = std::max(0.0, ata +
384 (Vo->
item(j, i - 1) * Vo->
item(j, i - 1)) - tmp);
388 for (
int j = 0; j < i; j++)
391 for (
int k = 0; k < num_samples_obtained; k++)
397 for (
int j = 0; j < noM->
dim(); j++)
400 for (
int k = 0; k < i; k++)
406 for (
int j = 0; j < A->
dim(); j++)
408 A->
item(j) = std::log(fabs(A->
item(j))) + std::log(b->
item(j)) - noM->
item(j);
416 num_basis_vectors,
false,
true);
426 for (
int j = 0; j < num_basis_vectors; j++)
429 for (
int k = 0; k < num_samples_obtained; k++)
435 for (
int j = 0; j < noM->
dim(); j++)
438 for (
int k = 0; k < num_basis_vectors; k++)
444 for (
int j = 0; j < A->
dim(); j++)
449 tmp += Vo->
item(j, k) * ls_res->
item(j, k);
451 A->
item(j) = std::log(1 + tmp) - noM->
item(j);
457 f_bv_max_local.row_val = -DBL_MAX;
458 f_bv_max_local.proc = myid;
459 for (
int j = 0; j < num_rows; ++j) {
460 if (proc_f_row_to_tmp_fs_row[f_bv_max_local.proc].find(j) ==
461 proc_f_row_to_tmp_fs_row[f_bv_max_local.proc].end())
463 double f_bv_val = A->
item(j);
464 if (f_bv_val > f_bv_max_local.row_val) {
465 f_bv_max_local.row_val = f_bv_val;
466 f_bv_max_local.row = j;
470 MPI_Allreduce(&f_bv_max_local, &f_bv_max_global, 1,
471 MaxRowType, RowInfoOp, MPI_COMM_WORLD);
473 if (f_bv_max_global.proc == myid) {
474 for (
int j = 0; j < num_basis_vectors; ++j) {
475 c[j] = Vo->
item(f_bv_max_global.row, j);
478 MPI_Bcast(c.data(), num_basis_vectors, MPI_DOUBLE,
479 f_bv_max_global.proc, MPI_COMM_WORLD);
481 for (
int j = 0; j < num_basis_vectors; ++j) {
482 V1.
item(num_samples_obtained, j) = c[j];
484 proc_sampled_f_row[f_bv_max_global.proc].insert(f_bv_max_global.row);
485 proc_f_row_to_tmp_fs_row[f_bv_max_global.proc][f_bv_max_global.row] =
486 num_samples_obtained;
487 num_samples_obtained++;
497 for (
int i = 0; i < num_procs; ++i) {
498 set<int>& this_proc_sampled_f_row = proc_sampled_f_row[i];
499 map<int, int>& this_proc_f_row_to_tmp_fs_row =
500 proc_f_row_to_tmp_fs_row[i];
501 f_sampled_rows_per_proc[i] = this_proc_sampled_f_row.size();
502 for (set<int>::iterator j = this_proc_sampled_f_row.begin();
503 j != this_proc_sampled_f_row.end(); ++j) {
505 f_sampled_row[idx] = this_f_row;
506 int tmp_fs_row = this_proc_f_row_to_tmp_fs_row[this_f_row];
507 for (
int col = 0; col < num_f_basis_cols; ++col) {
508 f_basis_sampled_inv.
item(idx, col) = V1.
item(tmp_fs_row, col);
514 CAROM_ASSERT(num_samples == idx);
520 MPI_Type_free(&MaxRowType);
521 MPI_Op_free(&RowInfoOp);
527 if (num_basis_vectors < f_basis->numColumns())
529 delete f_basis_truncated;
void setSize(int num_rows, int num_cols)
Sets the number of rows and columns of the matrix and reallocates storage if needed....
Matrix * getFirstNColumns(int n) const
Get the first N columns of a matrix.
double * getData() const
Get the matrix data as a pointer.
Matrix * inverse() const
Computes and returns the inverse of this.
void transposePseudoinverse()
Computes the transposePseudoinverse of this.
bool distributed() const
Returns true if the Matrix is distributed.
int numColumns() const
Returns the number of columns in the Matrix. This method will return the same value from each process...
Matrix * transposeMult(const Matrix &other) const
Multiplies the transpose of this Matrix with other and returns the product, reference version.
Matrix * mult(const Matrix &other) const
Multiplies this Matrix with other and returns the product, reference version.
const double & item(int row, int col) const
Const Matrix member access. Matrix data is stored in row-major format.
int numRows() const
Returns the number of rows of the Matrix on this processor.
Matrix * qr_factorize() const
Computes and returns the Q of the QR factorization of this.
void setSize(int dim)
Sets the length of the vector and reallocates storage if needed. All values are initialized to zero.
const double & item(int i) const
Const Vector member access.
int dim() const
Returns the dimension of the Vector on this processor.
void S_OPT(const Matrix *f_basis, int num_f_basis_vectors_used, std::vector< int > &f_sampled_row, std::vector< int > &f_sampled_rows_per_proc, Matrix &f_basis_sampled_inv, const int myid, const int num_procs, const int num_samples_req, std::vector< int > *init_samples, bool qr_factorize)
Computes the S_OPT algorithm on the given basis.