14 #include "linalg/Matrix.h"
15 #include "Utilities.h"
31 int num_f_basis_vectors_used,
32 std::vector<int>& f_sampled_row,
33 std::vector<int>& f_sampled_rows_per_proc,
34 Matrix& f_basis_sampled_inv,
37 const int num_samples_req,
38 std::vector<int>* init_samples,
41 CAROM_VERIFY(num_procs == f_sampled_rows_per_proc.size());
47 MPI_Datatype MaxRowType, oldtypes[2];
49 MPI_Aint offsets[2], extent, lower_bound;
52 oldtypes[0] = MPI_DOUBLE;
59 #if (MPI_VERSION == 1)
60 MPI_Type_extent(MPI_DOUBLE, &extent);
62 MPI_Type_get_extent(MPI_DOUBLE, &lower_bound, &extent);
66 oldtypes[1] = MPI_INT;
73 #if (MPI_VERSION == 1)
74 MPI_Type_struct(2, blockcounts, offsets, oldtypes, &MaxRowType);
76 MPI_Type_create_struct(2, blockcounts, offsets, oldtypes, &MaxRowType);
78 MPI_Type_commit(&MaxRowType);
82 MPI_Op_create((MPI_User_function*)RowInfoMax,
true, &RowInfoOp);
85 CAROM_VERIFY(0 < num_f_basis_vectors_used
86 && num_f_basis_vectors_used <= f_basis.
numColumns());
87 const int num_basis_vectors =
88 std::min(num_f_basis_vectors_used, f_basis.
numColumns());
89 const int num_samples = num_samples_req > 0 ? num_samples_req :
91 CAROM_VERIFY(num_basis_vectors <= num_samples
93 CAROM_VERIFY(num_samples == f_sampled_row.size());
94 CAROM_VERIFY(num_samples == f_basis_sampled_inv.
numRows() &&
95 num_basis_vectors == f_basis_sampled_inv.
numColumns());
98 int num_rows = f_basis.
numRows();
102 std::shared_ptr<Matrix> f_basis_truncated_N;
103 const Matrix *f_basis_truncated = &f_basis;
107 f_basis_truncated = f_basis_truncated_N.get();
110 std::shared_ptr<Matrix> Vo_qr;
111 const Matrix *Vo = f_basis_truncated;
119 int num_samples_obtained = 0;
122 std::vector<double> c(num_basis_vectors);
124 vector<set<int> > proc_sampled_f_row(num_procs);
125 vector<map<int, int> > proc_f_row_to_tmp_fs_row(num_procs);
126 int num_f_basis_cols = f_basis_sampled_inv.
numColumns();
131 RowInfo f_bv_max_local, f_bv_max_global;
134 const int num_init_samples = init_samples ? init_samples->size() : 0;
135 int total_num_init_samples = 0;
136 MPI_Allreduce(&num_init_samples, &total_num_init_samples, 1,
137 MPI_INT, MPI_SUM, MPI_COMM_WORLD);
138 CAROM_VERIFY(num_samples >= total_num_init_samples);
140 int init_sample_offset = 0;
141 for (
int i = 0; i < total_num_init_samples; i++)
143 f_bv_max_local.row_val = -DBL_MAX;
144 f_bv_max_local.proc = myid;
145 if (init_sample_offset < num_init_samples)
147 f_bv_max_local.row_val = 1.0;
148 f_bv_max_local.row = (*init_samples)[init_sample_offset];
150 MPI_Allreduce(&f_bv_max_local, &f_bv_max_global, 1,
151 MaxRowType, RowInfoOp, MPI_COMM_WORLD);
153 if (f_bv_max_global.proc == myid) {
154 for (
int j = 0; j < num_basis_vectors; ++j) {
155 c[j] = Vo->
item(f_bv_max_global.row, j);
157 init_sample_offset++;
159 MPI_Bcast(c.data(), num_basis_vectors, MPI_DOUBLE,
160 f_bv_max_global.proc, MPI_COMM_WORLD);
162 for (
int j = 0; j < num_basis_vectors; ++j) {
163 V1(num_samples_obtained, j) = c[j];
165 proc_sampled_f_row[f_bv_max_global.proc].insert(f_bv_max_global.row);
166 proc_f_row_to_tmp_fs_row[f_bv_max_global.proc][f_bv_max_global.row] =
167 num_samples_obtained;
168 num_samples_obtained++;
171 if (num_samples_obtained == 0)
173 f_bv_max_local.row_val = -DBL_MAX;
174 f_bv_max_local.proc = myid;
175 for (
int i = 0; i < num_rows; ++i) {
176 double f_bv_val = fabs(Vo->
item(i, 0));
177 if (f_bv_val > f_bv_max_local.row_val) {
178 f_bv_max_local.row_val = f_bv_val;
179 f_bv_max_local.row = i;
182 MPI_Allreduce(&f_bv_max_local, &f_bv_max_global, 1,
183 MaxRowType, RowInfoOp, MPI_COMM_WORLD);
185 if (f_bv_max_global.proc == myid) {
186 for (
int j = 0; j < num_basis_vectors; ++j) {
187 c[j] = Vo->
item(f_bv_max_global.row, j);
190 MPI_Bcast(c.data(), num_basis_vectors, MPI_DOUBLE,
191 f_bv_max_global.proc, MPI_COMM_WORLD);
193 for (
int j = 0; j < num_basis_vectors; ++j) {
196 proc_sampled_f_row[f_bv_max_global.proc].insert(f_bv_max_global.row);
197 proc_f_row_to_tmp_fs_row[f_bv_max_global.proc][f_bv_max_global.row] = 0;
198 num_samples_obtained++;
201 if (num_samples_obtained < num_samples)
206 Matrix A0(num_basis_vectors - 1, num_basis_vectors - 1,
false);
207 Matrix V1_last_col(num_basis_vectors - 1, 1,
false);
212 Vector ls_res_first_row(num_basis_vectors - 1,
false);
213 Vector nV(num_basis_vectors,
false);
216 if (total_num_init_samples > 1)
218 start_idx += (total_num_init_samples - 1);
220 for (
int i = start_idx; i <= num_samples; i++)
222 if (i <= num_basis_vectors)
226 for (
int j = 0; j < num_samples_obtained; j++)
228 for (
int k = 0; k < i - 1; k++)
232 V1_last_col(j, 0) = V1(j, i - 1);
235 std::unique_ptr<Matrix> atA0 = V1_last_col.
transposeMult(A0);
240 for (
int j = 0; j < V1_last_col.
numRows(); j++)
242 for (
int k = 0; k < V1_last_col.
numColumns(); k++)
244 ata += (V1_last_col(j, k) * V1_last_col(j, k));
256 rhs->
item(0, k) = atA0->item(0, k);
258 for (
int j = 1; j < rhs->
numRows(); j++)
262 rhs->
item(j, k) = Vo->
item(j - 1, k);
269 for (
int j = 0; j < rhs->
numRows(); j++)
278 std::unique_ptr<Matrix> ls_res = rhs->
mult(*lhs);
284 c_T =
new Matrix(ls_res->getData() + ls_res->numColumns(),
285 ls_res->numRows() - 1, ls_res->numColumns(), f_basis.
distributed(),
true);
289 c_T =
new Matrix(ls_res->getData(),
290 ls_res->numRows(), ls_res->numColumns(), f_basis.
distributed(),
true);
295 for (
int j = 0; j < Vo_first_i_columns->numRows(); j++)
298 for (
int k = 0; k < Vo_first_i_columns->numColumns(); k++)
300 tmp += (Vo_first_i_columns->item(j, k) * c_T->
item(j, k));
305 for (
int j = 0; j < num_rows; j++)
307 for (
int zz = 0; zz < i - 1; zz++)
309 tt(j, zz) = Vo->
item(j, zz) * Vo->
item(j, i - 1);
314 for (
int j = 0; j < g1.
numRows(); j++)
318 g1(j, k) = atA0->
item(0, k) + tt(j, k);
323 for (
int j = 0; j < c_T->
numRows(); j++)
328 tmp += c_T->
item(j, k) * g1(j, k);
333 for (
int j = 0; j < num_rows; j++)
335 for (
int zz = 0; zz < i - 1; zz++)
337 tt1(j, zz) = c_T->
item(j, zz) * (Vo->
item(j, i - 1) - g3(j));
343 ls_res_first_row.
setSize(ls_res->numColumns());
345 for (
int j = 0; j < ls_res->numColumns(); ++j) {
346 c[j] = ls_res->item(0, j);
349 MPI_Bcast(c.data(), ls_res->numColumns(), MPI_DOUBLE,
351 for (
int j = 0; j < ls_res->numColumns(); ++j) {
352 ls_res_first_row(j) = c[j];
355 for (
int j = 0; j < GG.
numRows(); j++)
359 GG(j, k) = ls_res_first_row(k) + tt1(j, k);
363 for (
int j = 0; j < A.
dim(); j++)
368 tmp += g1(j, k) * GG(j, k);
370 A(j) = std::max(0.0, ata +
371 (Vo->
item(j, i - 1) * Vo->
item(j, i - 1)) - tmp);
375 for (
int j = 0; j < i; j++)
378 for (
int k = 0; k < num_samples_obtained; k++)
380 nV(j) += (V1(k, j) * V1(k, j));
384 for (
int j = 0; j < noM.
dim(); j++)
387 for (
int k = 0; k < i; k++)
389 noM(j) += std::log(nV(k) + (Vo->
item(j, k) * Vo->
item(j, k)));
393 for (
int j = 0; j < A.
dim(); j++)
395 A(j) = std::log(fabs(A(j))) + std::log(b(j)) - noM(j);
401 num_basis_vectors,
false,
true);
402 std::unique_ptr<Matrix> lhs = curr_V1.
transposeMult(curr_V1);
405 std::unique_ptr<Matrix> ls_res = Vo->
mult(*lhs);
408 for (
int j = 0; j < num_basis_vectors; j++)
411 for (
int k = 0; k < num_samples_obtained; k++)
413 nV(j) += (V1(k, j) * V1(k, j));
417 for (
int j = 0; j < noM.
dim(); j++)
420 for (
int k = 0; k < num_basis_vectors; k++)
422 noM(j) += std::log(nV(k) + (Vo->
item(j, k) * Vo->
item(j, k)));
426 for (
int j = 0; j < A.
dim(); j++)
431 tmp += Vo->
item(j, k) * ls_res->item(j, k);
433 A(j) = std::log(1 + tmp) - noM(j);
437 f_bv_max_local.row_val = -DBL_MAX;
438 f_bv_max_local.proc = myid;
439 for (
int j = 0; j < num_rows; ++j) {
440 if (proc_f_row_to_tmp_fs_row[f_bv_max_local.proc].find(j) ==
441 proc_f_row_to_tmp_fs_row[f_bv_max_local.proc].end())
443 double f_bv_val = A(j);
444 if (f_bv_val > f_bv_max_local.row_val) {
445 f_bv_max_local.row_val = f_bv_val;
446 f_bv_max_local.row = j;
450 MPI_Allreduce(&f_bv_max_local, &f_bv_max_global, 1,
451 MaxRowType, RowInfoOp, MPI_COMM_WORLD);
453 if (f_bv_max_global.proc == myid) {
454 for (
int j = 0; j < num_basis_vectors; ++j) {
455 c[j] = Vo->
item(f_bv_max_global.row, j);
458 MPI_Bcast(c.data(), num_basis_vectors, MPI_DOUBLE,
459 f_bv_max_global.proc, MPI_COMM_WORLD);
461 for (
int j = 0; j < num_basis_vectors; ++j) {
462 V1(num_samples_obtained, j) = c[j];
464 proc_sampled_f_row[f_bv_max_global.proc].insert(f_bv_max_global.row);
465 proc_f_row_to_tmp_fs_row[f_bv_max_global.proc][f_bv_max_global.row] =
466 num_samples_obtained;
467 num_samples_obtained++;
474 for (
int i = 0; i < num_procs; ++i) {
475 set<int>& this_proc_sampled_f_row = proc_sampled_f_row[i];
476 map<int, int>& this_proc_f_row_to_tmp_fs_row =
477 proc_f_row_to_tmp_fs_row[i];
478 f_sampled_rows_per_proc[i] = this_proc_sampled_f_row.size();
479 for (set<int>::iterator j = this_proc_sampled_f_row.begin();
480 j != this_proc_sampled_f_row.end(); ++j) {
482 f_sampled_row[idx] = this_f_row;
483 int tmp_fs_row = this_proc_f_row_to_tmp_fs_row[this_f_row];
484 for (
int col = 0; col < num_f_basis_cols; ++col) {
485 f_basis_sampled_inv(idx, col) = V1(tmp_fs_row, col);
491 CAROM_ASSERT(num_samples == idx);
497 MPI_Type_free(&MaxRowType);
498 MPI_Op_free(&RowInfoOp);
void setSize(int num_rows, int num_cols)
Sets the number of rows and columns of the matrix and reallocates storage if needed....
double * getData() const
Get the matrix data as a pointer.
void transposePseudoinverse()
Computes the transposePseudoinverse of this.
bool distributed() const
Returns true if the Matrix is distributed.
int numColumns() const
Returns the number of columns in the Matrix. This method will return the same value from each process...
std::unique_ptr< Matrix > qr_factorize() const
Computes and returns the Q of the QR factorization of this.
int numDistributedRows() const
Returns the number of rows of the Matrix across all processors.
std::unique_ptr< Matrix > getFirstNColumns(int n) const
Get the first N columns of a matrix.
const double & item(int row, int col) const
Const Matrix member access. Matrix data is stored in row-major format.
int numRows() const
Returns the number of rows of the Matrix on this processor.
std::unique_ptr< Matrix > mult(const Matrix &other) const
Multiplies this Matrix with other and returns the product.
std::unique_ptr< Matrix > transposeMult(const Matrix &other) const
Multiplies the transpose of this Matrix with other and returns the product.
void setSize(int dim)
Sets the length of the vector and reallocates storage if needed. All values are initialized to zero.
int dim() const
Returns the dimension of the Vector on this processor.
void S_OPT(const Matrix &f_basis, int num_f_basis_vectors_used, std::vector< int > &f_sampled_row, std::vector< int > &f_sampled_rows_per_proc, Matrix &f_basis_sampled_inv, const int myid, const int num_procs, const int num_samples_req, std::vector< int > *init_samples, bool qr_factorize)
Computes the S_OPT algorithm on the given basis.