Révision | 45088b740ac2697d8a928e94681bdaea852a05b3 (tree) |
---|---|
l'heure | 2013-01-11 21:00:51 |
Auteur | Katsuhiko Nishimra <ktns.87@gmai...> |
Commiter | Katsuhiko Nishimra |
Merge trunk into branch gdiis. #28915
git-svn-id: https://svn.sourceforge.jp/svnroot/molds/branches/gdiis@1235 1136aad2-a195-0410-b898-f5ea1d11b9d8
@@ -22,7 +22,7 @@ LIBS32 = -lmkl_intel $(LIBSBASE) | ||
22 | 22 | LIBS64 = -lmkl_intel_ilp64 $(LIBSBASE) |
23 | 23 | CFLAGSBASE = -O0 -openmp -openmp-report2 -DMKL_INT=intptr_t |
24 | 24 | CFLAGS32 = $(CFLAGSBASE) |
25 | -CFLAGS64 = $(CFLAGSBASE) -DMKL_INT64 | |
25 | +CFLAGS64 = $(CFLAGSBASE) -DMKL_ILP64 | |
26 | 26 | BOOST_TOP_DIR = /usr/local/boost/ |
27 | 27 | BOOST_INC_DIR = $(BOOST_TOP_DIR)include/ |
28 | 28 | EXENAME = MolDS.out |
@@ -28,9 +28,9 @@ OPENBLAS_LIBS = -lopenblas | ||
28 | 28 | EXENAME = MolDS.out |
29 | 29 | DEPFILE = obj/objfile.dep |
30 | 30 | |
31 | -ALL_CPP_FILES = Main.cpp base/MolDSException.cpp wrappers/Blas_GNU.cpp wrappers/Lapack_GNU.cpp base/Utilities.cpp base/Enums.cpp base/MathUtilities.cpp base/MallocerFreer.cpp base/EularAngle.cpp base/Parameters.cpp base/atoms/Atom.cpp base/atoms/Hatom.cpp base/atoms/Liatom.cpp base/atoms/Catom.cpp base/atoms/Natom.cpp base/atoms/Oatom.cpp base/atoms/Satom.cpp base/factories/AtomFactory.cpp base/Molecule.cpp base/InputParser.cpp base/GTOExpansionSTO.cpp base/RealSphericalHarmonicsIndex.cpp base/loggers/MOLogger.cpp base/loggers/DensityLogger.cpp base/loggers/HoleDensityLogger.cpp base/loggers/ParticleDensityLogger.cpp cndo/Cndo2.cpp indo/Indo.cpp zindo/ZindoS.cpp mndo/Mndo.cpp am1/Am1.cpp am1/Am1D.cpp pm3/Pm3.cpp pm3/Pm3D.cpp pm3/Pm3Pddg.cpp base/factories/ElectronicStructureFactory.cpp md/MD.cpp mc/MC.cpp rpmd/RPMD.cpp nasco/NASCO.cpp optimization/Optimizer.cpp optimization/ConjugateGradient.cpp optimization/SteepestDescent.cpp optimization/BFGS.cpp optimization/GDIIS.cpp base/factories/OptimizerFactory.cpp base/MolDS.cpp | |
31 | +ALL_CPP_FILES = Main.cpp base/MolDSException.cpp wrappers/Blas.cpp wrappers/Lapack.cpp base/Utilities.cpp base/Enums.cpp base/MathUtilities.cpp base/MallocerFreer.cpp base/EularAngle.cpp base/Parameters.cpp base/atoms/Atom.cpp base/atoms/Hatom.cpp base/atoms/Liatom.cpp base/atoms/Catom.cpp base/atoms/Natom.cpp base/atoms/Oatom.cpp base/atoms/Satom.cpp base/factories/AtomFactory.cpp base/Molecule.cpp base/InputParser.cpp base/GTOExpansionSTO.cpp base/RealSphericalHarmonicsIndex.cpp base/loggers/MOLogger.cpp base/loggers/DensityLogger.cpp base/loggers/HoleDensityLogger.cpp base/loggers/ParticleDensityLogger.cpp cndo/Cndo2.cpp indo/Indo.cpp zindo/ZindoS.cpp mndo/Mndo.cpp am1/Am1.cpp am1/Am1D.cpp pm3/Pm3.cpp pm3/Pm3D.cpp pm3/Pm3Pddg.cpp base/factories/ElectronicStructureFactory.cpp md/MD.cpp mc/MC.cpp rpmd/RPMD.cpp nasco/NASCO.cpp optimization/Optimizer.cpp optimization/ConjugateGradient.cpp optimization/SteepestDescent.cpp optimization/BFGS.cpp optimization/GDIIS.cpp base/factories/OptimizerFactory.cpp base/MolDS.cpp | |
32 | 32 | ALL_HEAD_FILES = base/PrintController.h base/MolDSException.h base/Uncopyable.h wrappers/Blas.h wrappers/Lapack.h base/Utilities.h base/Enums.h base/MathUtilities.h base/MallocerFreer.h base/EularAngle.h base/Parameters.h base/atoms/Atom.h base/atoms/Hatom.h base/atoms/Liatom.h base/atoms/Catom.h base/atoms/Natom.h base/atoms/Oatom.h base/atoms/Satom.h base/factories/AtomFactory.h base/Molecule.h base/InputParser.h base/GTOExpansionSTO.h base/RealSphericalHarmonicsIndex.h base/loggers/MOLogger.h base/loggers/DensityLogger.h base/loggers/HoleDensityLogger.h base/loggers/ParticleDensityLogger.h base/ElectronicStructure.h cndo/Cndo2.h cndo/ReducedOverlapAOsParameters.h indo/Indo.h zindo/ZindoS.h mndo/Mndo.h am1/Am1.h am1/Am1D.h pm3/Pm3.h pm3/Pm3D.h pm3/Pm3Pddg.h base/factories/ElectronicStructureFactory.h md/MD.h mc/MC.h rpmd/RPMD.h nasco/NASCO.h optimization/Optimizer.h optimization/ConjugateGradient.h optimization/SteepestDescent.h optimization/BFGS.h optimization/GDIIS.h base/factories/OptimizerFactory.h base/MolDS.h |
33 | -ALL_OBJ_FILES = obj/Main.o obj/MolDSException.o obj/Blas_GNU.o obj/Lapack_GNU.o obj/Utilities.o obj/Enums.o obj/MathUtilities.o obj/MallocerFreer.o obj/EularAngle.o obj/Parameters.o obj/Atom.o obj/Hatom.o obj/Liatom.o obj/Catom.o obj/Natom.o obj/Oatom.o obj/Satom.o obj/AtomFactory.o obj/Molecule.o obj/InputParser.o obj/GTOExpansionSTO.o obj/RealSphericalHarmonicsIndex.o obj/MOLogger.o obj/DensityLogger.o obj/HoleDensityLogger.o obj/ParticleDensityLogger.o obj/Cndo2.o obj/Indo.o obj/ZindoS.o obj/Mndo.o obj/Am1.o obj/Am1D.o obj/Pm3.o obj/Pm3D.o obj/Pm3Pddg.o obj/ElectronicStructureFactory.o obj/MD.o obj/MC.o obj/RPMD.o obj/NASCO.o obj/Optimizer.o obj/ConjugateGradient.o obj/SteepestDescent.o obj/BFGS.o obj/GDIIS.o obj/OptimizerFactory.o obj/MolDS.o | |
33 | +ALL_OBJ_FILES = obj/Main.o obj/MolDSException.o obj/Blas.o obj/Lapack.o obj/Utilities.o obj/Enums.o obj/MathUtilities.o obj/MallocerFreer.o obj/EularAngle.o obj/Parameters.o obj/Atom.o obj/Hatom.o obj/Liatom.o obj/Catom.o obj/Natom.o obj/Oatom.o obj/Satom.o obj/AtomFactory.o obj/Molecule.o obj/InputParser.o obj/GTOExpansionSTO.o obj/RealSphericalHarmonicsIndex.o obj/MOLogger.o obj/DensityLogger.o obj/HoleDensityLogger.o obj/ParticleDensityLogger.o obj/Cndo2.o obj/Indo.o obj/ZindoS.o obj/Mndo.o obj/Am1.o obj/Am1D.o obj/Pm3.o obj/Pm3D.o obj/Pm3Pddg.o obj/ElectronicStructureFactory.o obj/MD.o obj/MC.o obj/RPMD.o obj/NASCO.o obj/Optimizer.o obj/ConjugateGradient.o obj/SteepestDescent.o obj/BFGS.o obj/GDIIS.o obj/OptimizerFactory.o obj/MolDS.o | |
34 | 34 | |
35 | 35 | $(EXENAME): $(DEPFILE) $(ALL_OBJ_FILES) |
36 | 36 | $(CC) -o $@ -Wl,-rpath=$(OPENBLAS_LIB_DIR) $(ALL_OBJ_FILES) -L$(OPENBLAS_LIB_DIR) $(LIBS) $(OPENBLAS_LIBS) |
@@ -25,7 +25,11 @@ | ||
25 | 25 | #include<string> |
26 | 26 | #include<stdexcept> |
27 | 27 | #include<boost/format.hpp> |
28 | +#ifdef __INTEL_COMPILER | |
28 | 29 | #include"mkl.h" |
30 | +#else | |
31 | +#include"cblas.h" | |
32 | +#endif | |
29 | 33 | #include"../base/PrintController.h" |
30 | 34 | #include"../base/MolDSException.h" |
31 | 35 | #include"../base/Uncopyable.h" |
@@ -75,7 +79,12 @@ void Blas::Dcopy(molds_blas_int n, | ||
75 | 79 | void Blas::Dcopy(molds_blas_int n, |
76 | 80 | double const* vectorX, molds_blas_int incrementX, |
77 | 81 | double* vectorY, molds_blas_int incrementY) const{ |
82 | +#ifdef __INTEL_COMPILER | |
78 | 83 | dcopy(&n, vectorX, &incrementX, vectorY, &incrementY); |
84 | +#else | |
85 | + double* x = const_cast<double*>(&vectorX[0]); | |
86 | + cblas_dcopy(n, x, incrementX, vectorY, incrementY); | |
87 | +#endif | |
79 | 88 | } |
80 | 89 | |
81 | 90 | // vectorY = alpha*vectorX + vectorY |
@@ -95,7 +104,12 @@ void Blas::Daxpy(molds_blas_int n, double alpha, | ||
95 | 104 | void Blas::Daxpy(molds_blas_int n, double alpha, |
96 | 105 | double const* vectorX, molds_blas_int incrementX, |
97 | 106 | double* vectorY, molds_blas_int incrementY) const{ |
107 | +#ifdef __INTEL_COMPILER | |
98 | 108 | daxpy(&n, &alpha, vectorX, &incrementX, vectorY, &incrementY); |
109 | +#else | |
110 | + double* x = const_cast<double*>(&vectorX[0]); | |
111 | + cblas_daxpy(n, alpha, x, incrementX, vectorY, incrementY); | |
112 | +#endif | |
99 | 113 | } |
100 | 114 | |
101 | 115 | // returns vectorX^T*vectorY |
@@ -115,7 +129,13 @@ double Blas::Ddot(molds_blas_int n, | ||
115 | 129 | double Blas::Ddot(molds_blas_int n, |
116 | 130 | double const* vectorX, molds_blas_int incrementX, |
117 | 131 | double const* vectorY, molds_blas_int incrementY)const{ |
132 | +#ifdef __INTEL_COMPILER | |
118 | 133 | return ddot(&n, vectorX, &incrementX, vectorY, &incrementY); |
134 | +#else | |
135 | + double* x=const_cast<double*>(vectorX), | |
136 | + * y=const_cast<double*>(vectorY); | |
137 | + return cblas_ddot(n, x, incrementX, y, incrementY); | |
138 | +#endif | |
119 | 139 | } |
120 | 140 | |
121 | 141 | // vectorY = matrixA*vectorX |
@@ -147,6 +167,7 @@ void Blas::Dgemv(bool isColumnMajorMatrixA, | ||
147 | 167 | double beta, |
148 | 168 | double* vectorY, |
149 | 169 | molds_blas_int incrementY) const{ |
170 | +#ifdef __INTEL_COMPILER | |
150 | 171 | double const* a = &matrixA[0][0]; |
151 | 172 | char transA; |
152 | 173 | if(isColumnMajorMatrixA){ |
@@ -158,6 +179,20 @@ void Blas::Dgemv(bool isColumnMajorMatrixA, | ||
158 | 179 | } |
159 | 180 | molds_blas_int lda = m; |
160 | 181 | dgemv(&transA, &m, &n, &alpha, a, &lda, vectorX, &incrementX, &beta, vectorY, &incrementY); |
182 | +#else | |
183 | + double* a = const_cast<double*>(&matrixA[0][0]); | |
184 | + double* x = const_cast<double*>(&vectorX[0]); | |
185 | + CBLAS_TRANSPOSE transA; | |
186 | + if(isColumnMajorMatrixA){ | |
187 | + transA = CblasNoTrans; | |
188 | + } | |
189 | + else{ | |
190 | + transA = CblasTrans; | |
191 | + swap(m,n); | |
192 | + } | |
193 | + int lda = m; | |
194 | + cblas_dgemv(CblasColMajor, transA, m, n, alpha, a, lda, x, incrementX, beta, vectorY, incrementY); | |
195 | +#endif | |
161 | 196 | } |
162 | 197 | |
163 | 198 | // vectorY = matrixA*vectorX |
@@ -183,10 +218,18 @@ void Blas::Dsymv(molds_blas_int n, double alpha, | ||
183 | 218 | double const* vectorX, molds_blas_int incrementX, |
184 | 219 | double beta, |
185 | 220 | double* vectorY, molds_blas_int incrementY) const{ |
221 | +#ifdef __INTEL_COMPILER | |
186 | 222 | double const* a = &matrixA[0][0]; |
187 | 223 | char uploA='L'; |
188 | 224 | molds_blas_int lda = n; |
189 | 225 | dsymv(&uploA, &n, &alpha, a, &lda, vectorX, &incrementX, &beta, vectorY, &incrementY); |
226 | +#else | |
227 | + double* a = const_cast<double*>(&matrixA[0][0]); | |
228 | + double* x = const_cast<double*>(&vectorX[0]); | |
229 | + CBLAS_UPLO uploA=CblasUpper; | |
230 | + int lda = n; | |
231 | + cblas_dsymv(CblasRowMajor, uploA, n, alpha, a, lda, x, incrementX, beta, vectorY, incrementY); | |
232 | +#endif | |
190 | 233 | } |
191 | 234 | |
192 | 235 | // matrixA = alpha*vectorX*vectorX^T + matrixA |
@@ -203,9 +246,16 @@ void Blas::Dsyr(molds_blas_int n, double alpha, | ||
203 | 246 | double const* vectorX, molds_blas_int incrementX, |
204 | 247 | double ** matrixA)const{ |
205 | 248 | double* a = &matrixA[0][0]; |
249 | +#ifdef __INTEL_COMPILER | |
206 | 250 | char uploA='L'; |
207 | 251 | molds_blas_int lda = n; |
208 | 252 | dsyr(&uploA, &n, &alpha, vectorX, &incrementX, a, &lda); |
253 | +#else | |
254 | + double* x = const_cast<double*>(&vectorX[0]); | |
255 | + CBLAS_UPLO uploA=CblasUpper; | |
256 | + int lda = n; | |
257 | + cblas_dsyr(CblasRowMajor, uploA, n, alpha, x, incrementX, a, lda); | |
258 | +#endif | |
209 | 259 | #pragma omp parallel for schedule(auto) |
210 | 260 | for(molds_blas_int i=0;i<n;i++){ |
211 | 261 | for(molds_blas_int j=i+1;j<n;j++){ |
@@ -241,6 +291,7 @@ void Blas::Dgemm(bool isColumnMajorMatrixA, | ||
241 | 291 | double const* const* matrixB, |
242 | 292 | double beta, |
243 | 293 | double** matrixC) const{ |
294 | +#ifdef __INTEL_COMPILER | |
244 | 295 | double const* a = &matrixA[0][0]; |
245 | 296 | double const* b = &matrixB[0][0]; |
246 | 297 | double* c = &matrixC[0][0]; |
@@ -266,9 +317,40 @@ void Blas::Dgemm(bool isColumnMajorMatrixA, | ||
266 | 317 | transB = 'T'; //kb=k |
267 | 318 | ldb = n; |
268 | 319 | } |
320 | +#else | |
321 | + double* a = const_cast<double*>(&matrixA[0][0]); | |
322 | + double* b = const_cast<double*>(&matrixB[0][0]); | |
323 | + double* c = &matrixC[0][0]; | |
324 | + | |
325 | + int lda; | |
326 | + CBLAS_TRANSPOSE transA; | |
327 | + if(isColumnMajorMatrixA){ | |
328 | + transA = CblasNoTrans; | |
329 | + lda = m; | |
330 | + } | |
331 | + else{ | |
332 | + transA = CblasTrans; | |
333 | + lda = k; | |
334 | + } | |
335 | + | |
336 | + int ldb; | |
337 | + CBLAS_TRANSPOSE transB; | |
338 | + if(isColumnMajorMatrixB){ | |
339 | + transB = CblasNoTrans; | |
340 | + ldb = k; | |
341 | + } | |
342 | + else{ | |
343 | + transB = CblasTrans; | |
344 | + ldb = n; | |
345 | + } | |
346 | +#endif | |
269 | 347 | |
270 | 348 | double* tmpC; |
349 | +#ifdef __INTEL_COMPILER | |
271 | 350 | tmpC = (double*)mkl_malloc( sizeof(double)*m*n, 16 ); |
351 | +#else | |
352 | + tmpC = (double*)malloc( sizeof(double)*m*n); | |
353 | +#endif | |
272 | 354 | for(molds_blas_int i=0; i<m; i++){ |
273 | 355 | for(molds_blas_int j=0; j<n; j++){ |
274 | 356 | tmpC[i+j*m] = matrixC[i][j]; |
@@ -276,13 +358,21 @@ void Blas::Dgemm(bool isColumnMajorMatrixA, | ||
276 | 358 | } |
277 | 359 | molds_blas_int ldc = m; |
278 | 360 | //call blas |
361 | +#ifdef __INTEL_COMPILER | |
279 | 362 | dgemm(&transA, &transB, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, tmpC, &ldc); |
363 | +#else | |
364 | + cblas_dgemm(CblasColMajor, transA, transB, m, n, k, alpha, a, lda, b, ldb, beta, tmpC, ldc); | |
365 | +#endif | |
280 | 366 | for(molds_blas_int i=0; i<m; i++){ |
281 | 367 | for(molds_blas_int j=0; j<n; j++){ |
282 | 368 | matrixC[i][j] = tmpC[i+j*m]; |
283 | 369 | } |
284 | 370 | } |
371 | +#ifdef __INTEL_COMPILER | |
285 | 372 | mkl_free(tmpC); |
373 | +#else | |
374 | + free(tmpC); | |
375 | +#endif | |
286 | 376 | } |
287 | 377 | |
288 | 378 | } |
@@ -1,296 +0,0 @@ | ||
1 | -//************************************************************************// | |
2 | -// Copyright (C) 2011-2012 Mikiya Fujii // | |
3 | -// Copyright (C) 2012-2012 Katsuhiko Nishimra // | |
4 | -// // | |
5 | -// This file is part of MolDS. // | |
6 | -// // | |
7 | -// MolDS is free software: you can redistribute it and/or modify // | |
8 | -// it under the terms of the GNU General Public License as published by // | |
9 | -// the Free Software Foundation, either version 3 of the License, or // | |
10 | -// (at your option) any later version. // | |
11 | -// // | |
12 | -// MolDS is distributed in the hope that it will be useful, // | |
13 | -// but WITHOUT ANY WARRANTY; without even the implied warranty of // | |
14 | -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // | |
15 | -// GNU General Public License for more details. // | |
16 | -// // | |
17 | -// You should have received a copy of the GNU General Public License // | |
18 | -// along with MolDS. If not, see <http://www.gnu.org/licenses/>. // | |
19 | -//************************************************************************// | |
20 | -#include<stdio.h> | |
21 | -#include<stdlib.h> | |
22 | -#include<iostream> | |
23 | -#include<sstream> | |
24 | -#include<math.h> | |
25 | -#include<string> | |
26 | -#include<stdexcept> | |
27 | -#include<boost/format.hpp> | |
28 | -#include"cblas.h" | |
29 | -#include"../base/PrintController.h" | |
30 | -#include"../base/MolDSException.h" | |
31 | -#include"../base/Uncopyable.h" | |
32 | -#include"Blas.h" | |
33 | -using namespace std; | |
34 | -using namespace MolDS_base; | |
35 | - | |
36 | -namespace MolDS_wrappers{ | |
37 | -Blas* Blas::blas = NULL; | |
38 | - | |
39 | -Blas::Blas(){ | |
40 | -} | |
41 | - | |
42 | -Blas::~Blas(){ | |
43 | -} | |
44 | - | |
45 | -Blas* Blas::GetInstance(){ | |
46 | - if(blas == NULL){ | |
47 | - blas = new Blas(); | |
48 | - //this->OutputLog("Blas created.\n\n"); | |
49 | - } | |
50 | - return blas; | |
51 | -} | |
52 | - | |
53 | -void Blas::DeleteInstance(){ | |
54 | - if(blas != NULL){ | |
55 | - delete blas; | |
56 | - //this->OutputLog("Blas deleted\n\n"); | |
57 | - } | |
58 | - blas = NULL; | |
59 | -} | |
60 | - | |
61 | -// vectorY = vectorX | |
62 | -// vectorX: n-vector | |
63 | -// vectorY: n-vector | |
64 | -void Blas::Dcopy(molds_blas_int n, | |
65 | - double const* vectorX, | |
66 | - double * vectorY)const{ | |
67 | - molds_blas_int incrementX=1; | |
68 | - molds_blas_int incrementY=1; | |
69 | - this->Dcopy(n, vectorX, incrementX, vectorY, incrementY); | |
70 | -} | |
71 | - | |
72 | -// vectorY = vectorX | |
73 | -// vectorX: n-vector | |
74 | -// vectorY: n-vector | |
75 | -void Blas::Dcopy(molds_blas_int n, | |
76 | - double const* vectorX, molds_blas_int incrementX, | |
77 | - double* vectorY, molds_blas_int incrementY) const{ | |
78 | - double* x = const_cast<double*>(&vectorX[0]); | |
79 | - cblas_dcopy(n, x, incrementX, vectorY, incrementY); | |
80 | -} | |
81 | - | |
82 | -// vectorY = alpha*vectorX + vectorY | |
83 | -// vectorX: n-vector | |
84 | -// vectorY: n-vector | |
85 | -void Blas::Daxpy(molds_blas_int n, double alpha, | |
86 | - double const* vectorX, | |
87 | - double* vectorY) const{ | |
88 | - molds_blas_int incrementX=1; | |
89 | - molds_blas_int incrementY=1; | |
90 | - this->Daxpy(n, alpha, vectorX, incrementX, vectorY, incrementY); | |
91 | -} | |
92 | - | |
93 | -// vectorY = alpha*vectorX + vectorY | |
94 | -// vectorX: n-vector | |
95 | -// vectorY: n-vector | |
96 | -void Blas::Daxpy(molds_blas_int n, double alpha, | |
97 | - double const* vectorX, molds_blas_int incrementX, | |
98 | - double* vectorY, molds_blas_int incrementY) const{ | |
99 | - double* x = const_cast<double*>(&vectorX[0]); | |
100 | - cblas_daxpy(n, alpha, x, incrementX, vectorY, incrementY); | |
101 | -} | |
102 | - | |
103 | -// returns vectorX^T*vectorY | |
104 | -// vectorX: n-vector | |
105 | -// vectorY: n-vector | |
106 | -double Blas::Ddot(molds_blas_int n, | |
107 | - double const* vectorX, | |
108 | - double const* vectorY) const{ | |
109 | - molds_blas_int incrementX=1; | |
110 | - molds_blas_int incrementY=1; | |
111 | - return this->Ddot(n, vectorX, incrementX, vectorY, incrementY); | |
112 | -} | |
113 | - | |
114 | -// returns vectorX^T*vectorY | |
115 | -// vectorX: n-vector | |
116 | -// vectorY: n-vector | |
117 | -double Blas::Ddot(molds_blas_int n, | |
118 | - double const* vectorX, molds_blas_int incrementX, | |
119 | - double const* vectorY, molds_blas_int incrementY)const{ | |
120 | - double* x=const_cast<double*>(vectorX), | |
121 | - * y=const_cast<double*>(vectorY); | |
122 | - return cblas_ddot(n, x, incrementX, y, incrementY); | |
123 | -} | |
124 | - | |
125 | -// vectorY = matrixA*vectorX | |
126 | -// matrixA: m*n-matrix (matrixA[m][n] in row-major (C/C++ style)) | |
127 | -// vectorX: n-vector | |
128 | -// vectorY: m-vector | |
129 | -void Blas::Dgemv(molds_blas_int m, molds_blas_int n, | |
130 | - double const* const* matrixA, | |
131 | - double const* vectorX, | |
132 | - double* vectorY) const{ | |
133 | - bool isColumnMajorMatrixA = false; // because, in general, C/C++ style is row-major. | |
134 | - molds_blas_int incrementX=1; | |
135 | - molds_blas_int incrementY=1; | |
136 | - double alpha =1.0; | |
137 | - double beta =0.0; | |
138 | - this->Dgemv(isColumnMajorMatrixA, m, n, alpha, matrixA, vectorX, incrementX, beta, vectorY, incrementY); | |
139 | -} | |
140 | - | |
141 | -// vectorY = alpha*matrixA*vectorX + beta*vectorY | |
142 | -// matrixA: m*n-matrix | |
143 | -// vectorX: n-vector | |
144 | -// vectorY: m-vector | |
145 | -void Blas::Dgemv(bool isColumnMajorMatrixA, | |
146 | - molds_blas_int m, molds_blas_int n, | |
147 | - double alpha, | |
148 | - double const* const* matrixA, | |
149 | - double const* vectorX, | |
150 | - molds_blas_int incrementX, | |
151 | - double beta, | |
152 | - double* vectorY, | |
153 | - molds_blas_int incrementY) const{ | |
154 | - double* a = const_cast<double*>(&matrixA[0][0]); | |
155 | - double* x = const_cast<double*>(&vectorX[0]); | |
156 | - CBLAS_TRANSPOSE transA; | |
157 | - if(isColumnMajorMatrixA){ | |
158 | - transA = CblasNoTrans; | |
159 | - } | |
160 | - else{ | |
161 | - transA = CblasTrans; | |
162 | - swap(m,n); | |
163 | - } | |
164 | - molds_blas_int lda = m; | |
165 | - cblas_dgemv(CblasColMajor, transA, m, n, alpha, a, lda, x, incrementX, beta, vectorY, incrementY); | |
166 | - | |
167 | -} | |
168 | - | |
169 | -// vectorY = matrixA*vectorX | |
170 | -// matrixA: n*n-matrix,symmetric (Use the upper triangular part) | |
171 | -// vectorX: n-vector | |
172 | -// vectorY: n-vector | |
173 | -void Blas::Dsymv(molds_blas_int n, | |
174 | - double const* const* matrixA, | |
175 | - double const* vectorX, | |
176 | - double* vectorY) const{ | |
177 | - bool isColumnMajorMatrixA = false; // because, in general, C/C++ style is row-major. | |
178 | - molds_blas_int incrementX=1, incrementY=1; | |
179 | - double alpha=1.0, beta=0.0; | |
180 | - this->Dsymv(n, alpha, matrixA, vectorX, incrementX, beta, vectorY, incrementY); | |
181 | -} | |
182 | - | |
183 | -// vectorY = alpha*matrixA*vectorX + beta*vectorY | |
184 | -// matrixA: n*n-matrix,symmetric (Use the upper triangular part) | |
185 | -// vectorX: n-vector | |
186 | -// vectorY: n-vector | |
187 | -void Blas::Dsymv(molds_blas_int n, double alpha, | |
188 | - double const* const* matrixA, | |
189 | - double const* vectorX, molds_blas_int incrementX, | |
190 | - double beta, | |
191 | - double* vectorY, molds_blas_int incrementY) const{ | |
192 | - double* a = const_cast<double*>(&matrixA[0][0]); | |
193 | - double* x = const_cast<double*>(&vectorX[0]); | |
194 | - CBLAS_UPLO uploA=CblasUpper; | |
195 | - molds_blas_int lda = n; | |
196 | - cblas_dsymv(CblasRowMajor, uploA, n, alpha, a, lda, x, incrementX, beta, vectorY, incrementY); | |
197 | -} | |
198 | - | |
199 | -// matrixA = alpha*vectorX*vectorX^T + matrixA | |
200 | -// matrixA: n*n-matrix,symmetric (Use the upper triangular part, and copy it to the lower part.) | |
201 | -// vectorX: n-matrix | |
202 | -void Blas::Dsyr(molds_blas_int n, double alpha, | |
203 | - double const* vectorX, | |
204 | - double ** matrixA)const{ | |
205 | - molds_blas_int incrementX=1; | |
206 | - this->Dsyr(n, alpha, vectorX, incrementX, matrixA); | |
207 | -} | |
208 | - | |
209 | -void Blas::Dsyr(molds_blas_int n, double alpha, | |
210 | - double const* vectorX, molds_blas_int incrementX, | |
211 | - double ** matrixA)const{ | |
212 | - double* a = &matrixA[0][0]; | |
213 | - double* x = const_cast<double*>(&vectorX[0]); | |
214 | - CBLAS_UPLO uploA=CblasUpper; | |
215 | - molds_blas_int lda = n; | |
216 | - cblas_dsyr(CblasRowMajor, uploA, n, alpha, x, incrementX, a, lda); | |
217 | -#pragma omp parallel for schedule(auto) | |
218 | - for(molds_blas_int i=0;i<n;i++){ | |
219 | - for(molds_blas_int j=i+1;j<n;j++){ | |
220 | - matrixA[j][i] = matrixA[i][j]; | |
221 | - } | |
222 | - } | |
223 | -} | |
224 | - | |
225 | -// matrixC = matrixA*matrixB | |
226 | -// matrixA: m*k-matrix (matrixA[m][k] in row-major (C/C++ style)) | |
227 | -// matrixB: k*n-matrix (matrixB[k][n] in row-major (C/C++ style)) | |
228 | -// matrixC: m*n-matrix (matrixC[m][n] in row-major (C/C++ style)) | |
229 | -void Blas::Dgemm(molds_blas_int m, molds_blas_int n, molds_blas_int k, | |
230 | - double const* const* matrixA, | |
231 | - double const* const* matrixB, | |
232 | - double** matrixC) const{ | |
233 | - bool isColumnMajorMatrixA = false; // because, in general, C/C++ style is row-major. | |
234 | - bool isColumnMajorMatrixB = false; // because, in general, C/C++ style is row-major. | |
235 | - double alpha=1.0; | |
236 | - double beta =0.0; | |
237 | - this->Dgemm(isColumnMajorMatrixA, isColumnMajorMatrixB, m, n, k, alpha, matrixA, matrixB, beta, matrixC); | |
238 | -} | |
239 | - | |
240 | -// matrixC = alpha*matrixA*matrixB + beta*matrixC | |
241 | -// matrixA: m*k-matrix | |
242 | -// matrixB: k*n-matrix | |
243 | -// matrixC: m*n-matrix (matrixC[m][n] in row-major (C/C++ style)) | |
244 | -void Blas::Dgemm(bool isColumnMajorMatrixA, | |
245 | - bool isColumnMajorMatrixB, | |
246 | - molds_blas_int m, molds_blas_int n, molds_blas_int k, | |
247 | - double alpha, | |
248 | - double const* const* matrixA, | |
249 | - double const* const* matrixB, | |
250 | - double beta, | |
251 | - double** matrixC) const{ | |
252 | - double* a = const_cast<double*>(&matrixA[0][0]); | |
253 | - double* b = const_cast<double*>(&matrixB[0][0]); | |
254 | - double* c = &matrixC[0][0]; | |
255 | - | |
256 | - molds_blas_int lda; | |
257 | - CBLAS_TRANSPOSE transA; | |
258 | - if(isColumnMajorMatrixA){ | |
259 | - transA = CblasNoTrans; | |
260 | - lda = m; | |
261 | - } | |
262 | - else{ | |
263 | - transA = CblasTrans; | |
264 | - lda = k; | |
265 | - } | |
266 | - | |
267 | - molds_blas_int ldb; | |
268 | - CBLAS_TRANSPOSE transB; | |
269 | - if(isColumnMajorMatrixB){ | |
270 | - transB = CblasNoTrans; | |
271 | - ldb = k; | |
272 | - } | |
273 | - else{ | |
274 | - transB = CblasTrans; | |
275 | - ldb = n; | |
276 | - } | |
277 | - | |
278 | - double* tmpC; | |
279 | - tmpC = (double*)malloc( sizeof(double)*m*n); | |
280 | - for(molds_blas_int i=0; i<m; i++){ | |
281 | - for(molds_blas_int j=0; j<n; j++){ | |
282 | - tmpC[i+j*m] = matrixC[i][j]; | |
283 | - } | |
284 | - } | |
285 | - molds_blas_int ldc = m; | |
286 | - //call blas | |
287 | - cblas_dgemm(CblasColMajor, transA, transB, m, n, k, alpha, a, lda, b, ldb, beta, tmpC, ldc); | |
288 | - for(molds_blas_int i=0; i<m; i++){ | |
289 | - for(molds_blas_int j=0; j<n; j++){ | |
290 | - matrixC[i][j] = tmpC[i+j*m]; | |
291 | - } | |
292 | - } | |
293 | - free(tmpC); | |
294 | -} | |
295 | - | |
296 | -} |
@@ -24,11 +24,30 @@ | ||
24 | 24 | #include<string> |
25 | 25 | #include<stdexcept> |
26 | 26 | #include<boost/format.hpp> |
27 | +#ifdef __INTEL_COMPILER | |
27 | 28 | #include"mkl.h" |
29 | +#else | |
30 | +#if ( __WORDSIZE == 32 ) | |
31 | +#else | |
32 | +#define HAVE_LAPACK_CONFIG_H | |
33 | +#define LAPACK_ILP64 | |
34 | +#endif | |
35 | +#include"lapacke.h" | |
36 | +#endif | |
28 | 37 | #include"../base/PrintController.h" |
29 | 38 | #include"../base/MolDSException.h" |
30 | 39 | #include"../base/Uncopyable.h" |
31 | 40 | #include"Lapack.h" |
41 | + | |
42 | +#ifdef __INTEL_COMPILER | |
43 | +#define MOLDS_LAPACK_malloc(a,b) mkl_malloc(a,b) | |
44 | +#define MOLDS_LAPACK_free(a) mkl_free(a) | |
45 | +#else | |
46 | +#define MOLDS_LAPACK_malloc(a,b) malloc(a) | |
47 | +#define MOLDS_LAPACK_free(a) free(a) | |
48 | +#endif | |
49 | + | |
50 | + | |
32 | 51 | using namespace std; |
33 | 52 | using namespace MolDS_base; |
34 | 53 |
@@ -121,10 +140,10 @@ molds_lapack_int Lapack::Dsyevd(double** matrix, double* eigenValues, molds_lapa | ||
121 | 140 | } |
122 | 141 | |
123 | 142 | // malloc |
124 | - work = (double*)mkl_malloc( sizeof(double)*lwork, 16 ); | |
125 | - iwork = (molds_lapack_int*)mkl_malloc( sizeof(molds_lapack_int)*liwork, 16 ); | |
126 | - convertedMatrix = (double*)mkl_malloc( sizeof(double)*size*size, 16 ); | |
127 | - tempEigenValues = (double*)mkl_malloc( sizeof(double)*size, 16 ); | |
143 | + work = (double*)MOLDS_LAPACK_malloc( sizeof(double)*lwork, 16 ); | |
144 | + iwork = (molds_lapack_int*)MOLDS_LAPACK_malloc( sizeof(molds_lapack_int)*liwork, 16 ); | |
145 | + convertedMatrix = (double*)MOLDS_LAPACK_malloc( sizeof(double)*size*size, 16 ); | |
146 | + tempEigenValues = (double*)MOLDS_LAPACK_malloc( sizeof(double)*size, 16 ); | |
128 | 147 | |
129 | 148 | for(molds_lapack_int i = 0; i < size; i++){ |
130 | 149 | for(molds_lapack_int j = i; j < size; j++){ |
@@ -133,7 +152,11 @@ molds_lapack_int Lapack::Dsyevd(double** matrix, double* eigenValues, molds_lapa | ||
133 | 152 | } |
134 | 153 | |
135 | 154 | // call Lapack |
155 | +#ifdef __INTEL_COMPILER | |
136 | 156 | dsyevd(&job, &uplo, &size, convertedMatrix, &lda, tempEigenValues, work, &lwork, iwork, &liwork, &info); |
157 | +#else | |
158 | + info = LAPACKE_dsyevd_work(LAPACK_COL_MAJOR, job, uplo, size, convertedMatrix, lda, tempEigenValues, work, lwork, iwork, liwork); | |
159 | +#endif | |
137 | 160 | |
138 | 161 | for(molds_lapack_int i = 0; i < size; i++){ |
139 | 162 | for(molds_lapack_int j = 0; j < size; j++){ |
@@ -160,10 +183,10 @@ molds_lapack_int Lapack::Dsyevd(double** matrix, double* eigenValues, molds_lapa | ||
160 | 183 | //this->OutputLog(boost::format("size=%d lwork=%d liwork=%d k=%d info=%d\n") % size % lwork % liwork % k % info); |
161 | 184 | |
162 | 185 | // free |
163 | - mkl_free(work); | |
164 | - mkl_free(iwork); | |
165 | - mkl_free(convertedMatrix); | |
166 | - mkl_free(tempEigenValues); | |
186 | + MOLDS_LAPACK_free(work); | |
187 | + MOLDS_LAPACK_free(iwork); | |
188 | + MOLDS_LAPACK_free(convertedMatrix); | |
189 | + MOLDS_LAPACK_free(tempEigenValues); | |
167 | 190 | |
168 | 191 | if(info != 0){ |
169 | 192 | stringstream ss; |
@@ -199,9 +222,9 @@ molds_lapack_int Lapack::Dsysv(double const* const* matrix, double* b, molds_lap | ||
199 | 222 | } |
200 | 223 | |
201 | 224 | // malloc |
202 | - ipiv = (molds_lapack_int*)mkl_malloc( sizeof(molds_lapack_int)*2*size, 16 ); | |
203 | - convertedMatrix = (double*)mkl_malloc( sizeof(double)*size*size, 16 ); | |
204 | - tempB = (double*)mkl_malloc( sizeof(double)*size, 16 ); | |
225 | + ipiv = (molds_lapack_int*)MOLDS_LAPACK_malloc( sizeof(molds_lapack_int)*2*size, 16 ); | |
226 | + convertedMatrix = (double*)MOLDS_LAPACK_malloc( sizeof(double)*size*size, 16 ); | |
227 | + tempB = (double*)MOLDS_LAPACK_malloc( sizeof(double)*size, 16 ); | |
205 | 228 | |
206 | 229 | for(molds_lapack_int i = 0; i < size; i++){ |
207 | 230 | for(molds_lapack_int j = i; j < size; j++){ |
@@ -218,24 +241,32 @@ molds_lapack_int Lapack::Dsysv(double const* const* matrix, double* b, molds_lap | ||
218 | 241 | { |
219 | 242 | lwork = -1; |
220 | 243 | double tempWork[3]={0.0, 0.0, 0.0}; |
221 | - dsysv(&uplo, &size, &nrhs, convertedMatrix, &lda, ipiv, tempB, &ldb, tempWork, &lwork, &info); | |
244 | +#ifdef __INTEL_COMPILER | |
245 | + dsysv(&uplo, &size, &nrhs, convertedMatrix, &lda, ipiv, tempB, &ldb, tempWork, &lwork, &info); | |
246 | +#else | |
247 | + info = LAPACKE_dsysv_work(LAPACK_COL_MAJOR, uplo, size, nrhs, convertedMatrix, lda, ipiv, tempB, ldb, tempWork, lwork); | |
248 | +#endif | |
222 | 249 | blockSize = tempWork[0]/size; |
223 | 250 | } |
224 | 251 | info = 0; |
225 | 252 | lwork = blockSize*size; |
226 | - work = (double*)mkl_malloc( sizeof(double)*lwork, 16 ); | |
253 | + work = (double*)MOLDS_LAPACK_malloc( sizeof(double)*lwork, 16 ); | |
227 | 254 | |
228 | 255 | // call Lapack |
256 | +#ifdef __INTEL_COMPILER | |
229 | 257 | dsysv(&uplo, &size, &nrhs, convertedMatrix, &lda, ipiv, tempB, &ldb, work, &lwork, &info); |
258 | +#else | |
259 | + info = LAPACKE_dsysv_work(LAPACK_COL_MAJOR, uplo, size, nrhs, convertedMatrix, lda, ipiv, tempB, ldb, work, lwork); | |
260 | +#endif | |
230 | 261 | for(molds_lapack_int i = 0; i < size; i++){ |
231 | 262 | b[i] = tempB[i]; |
232 | 263 | } |
233 | 264 | |
234 | 265 | // free |
235 | - mkl_free(convertedMatrix); | |
236 | - mkl_free(ipiv); | |
237 | - mkl_free(work); | |
238 | - mkl_free(tempB); | |
266 | + MOLDS_LAPACK_free(convertedMatrix); | |
267 | + MOLDS_LAPACK_free(ipiv); | |
268 | + MOLDS_LAPACK_free(work); | |
269 | + MOLDS_LAPACK_free(tempB); | |
239 | 270 | |
240 | 271 | if(info != 0){ |
241 | 272 | stringstream ss; |
@@ -271,9 +302,9 @@ molds_lapack_int Lapack::Dgetrs(double const* const* matrix, double** b, molds_l | ||
271 | 302 | |
272 | 303 | try{ |
273 | 304 | // malloc |
274 | - ipiv = (molds_lapack_int*)mkl_malloc( sizeof(molds_lapack_int)*2*size, 16 ); | |
275 | - convertedMatrix = (double*)mkl_malloc( sizeof(double)*size*size, 16 ); | |
276 | - convertedB = (double*)mkl_malloc( sizeof(double)*nrhs*size, 16 ); | |
305 | + ipiv = (molds_lapack_int*)MOLDS_LAPACK_malloc( sizeof(molds_lapack_int)*2*size, 16 ); | |
306 | + convertedMatrix = (double*)MOLDS_LAPACK_malloc( sizeof(double)*size*size, 16 ); | |
307 | + convertedB = (double*)MOLDS_LAPACK_malloc( sizeof(double)*nrhs*size, 16 ); | |
277 | 308 | for(molds_lapack_int i = 0; i < size; i++){ |
278 | 309 | for(molds_lapack_int j = 0; j < size; j++){ |
279 | 310 | convertedMatrix[i+j*size] = matrix[i][j]; |
@@ -285,7 +316,11 @@ molds_lapack_int Lapack::Dgetrs(double const* const* matrix, double** b, molds_l | ||
285 | 316 | } |
286 | 317 | } |
287 | 318 | this->Dgetrf(convertedMatrix, ipiv, size, size); |
319 | +#ifdef __INTEL_COMPILER | |
288 | 320 | dgetrs(&trans, &size, &nrhs, convertedMatrix, &lda, ipiv, convertedB, &ldb, &info); |
321 | +#else | |
322 | + info = LAPACKE_dgetrs_work(LAPACK_COL_MAJOR, trans, size, nrhs, convertedMatrix, lda, ipiv, convertedB, ldb); | |
323 | +#endif | |
289 | 324 | for(molds_lapack_int i = 0; i < nrhs; i++){ |
290 | 325 | for(molds_lapack_int j = 0; j < size; j++){ |
291 | 326 | b[i][j] = convertedB[j+i*size]; |
@@ -294,15 +329,15 @@ molds_lapack_int Lapack::Dgetrs(double const* const* matrix, double** b, molds_l | ||
294 | 329 | } |
295 | 330 | catch(MolDSException ex){ |
296 | 331 | // free |
297 | - mkl_free(convertedMatrix); | |
298 | - mkl_free(convertedB); | |
299 | - mkl_free(ipiv); | |
332 | + MOLDS_LAPACK_free(convertedMatrix); | |
333 | + MOLDS_LAPACK_free(convertedB); | |
334 | + MOLDS_LAPACK_free(ipiv); | |
300 | 335 | throw ex; |
301 | 336 | } |
302 | 337 | // free |
303 | - mkl_free(convertedMatrix); | |
304 | - mkl_free(convertedB); | |
305 | - mkl_free(ipiv); | |
338 | + MOLDS_LAPACK_free(convertedMatrix); | |
339 | + MOLDS_LAPACK_free(convertedB); | |
340 | + MOLDS_LAPACK_free(ipiv); | |
306 | 341 | |
307 | 342 | if(info != 0){ |
308 | 343 | stringstream ss; |
@@ -316,9 +351,9 @@ molds_lapack_int Lapack::Dgetrs(double const* const* matrix, double** b, molds_l | ||
316 | 351 | // Argument "matrix" is sizeM*sizeN matrix. |
317 | 352 | // Argument "matrix" will be LU-decomposed. |
318 | 353 | molds_lapack_int Lapack::Dgetrf(double** matrix, molds_lapack_int sizeM, molds_lapack_int sizeN) const{ |
319 | - molds_lapack_int* ipiv = (molds_lapack_int*)mkl_malloc( sizeof(molds_lapack_int)*2*sizeM, 16 ); | |
354 | + molds_lapack_int* ipiv = (molds_lapack_int*)MOLDS_LAPACK_malloc( sizeof(molds_lapack_int)*2*sizeM, 16 ); | |
320 | 355 | this->Dgetrf(matrix, ipiv, sizeM, sizeN); |
321 | - mkl_free(ipiv); | |
356 | + MOLDS_LAPACK_free(ipiv); | |
322 | 357 | molds_lapack_int info = 0; |
323 | 358 | return info; |
324 | 359 | } |
@@ -326,7 +361,7 @@ molds_lapack_int Lapack::Dgetrf(double** matrix, molds_lapack_int sizeM, molds_l | ||
326 | 361 | // Argument "matrix" is sizeM*sizeN matrix. |
327 | 362 | // Argument "matrix" will be LU-decomposed. |
328 | 363 | molds_lapack_int Lapack::Dgetrf(double** matrix, molds_lapack_int* ipiv, molds_lapack_int sizeM, molds_lapack_int sizeN) const{ |
329 | - double* convertedMatrix = (double*)mkl_malloc( sizeof(double)*sizeM*sizeN, 16 ); | |
364 | + double* convertedMatrix = (double*)MOLDS_LAPACK_malloc( sizeof(double)*sizeM*sizeN, 16 ); | |
330 | 365 | for(molds_lapack_int i=0; i<sizeM; i++){ |
331 | 366 | for(molds_lapack_int j=0; j<sizeN; j++){ |
332 | 367 | convertedMatrix[i+j*sizeM] = matrix[i][j]; |
@@ -338,7 +373,7 @@ molds_lapack_int Lapack::Dgetrf(double** matrix, molds_lapack_int* ipiv, molds_l | ||
338 | 373 | matrix[i][j] = convertedMatrix[i+j*sizeM]; |
339 | 374 | } |
340 | 375 | } |
341 | - mkl_free(convertedMatrix); | |
376 | + MOLDS_LAPACK_free(convertedMatrix); | |
342 | 377 | molds_lapack_int info = 0; |
343 | 378 | return info; |
344 | 379 | } |
@@ -348,7 +383,11 @@ molds_lapack_int Lapack::Dgetrf(double** matrix, molds_lapack_int* ipiv, molds_l | ||
348 | 383 | molds_lapack_int Lapack::Dgetrf(double* matrix, molds_lapack_int* ipiv, molds_lapack_int sizeM, molds_lapack_int sizeN) const{ |
349 | 384 | molds_lapack_int info = 0; |
350 | 385 | molds_lapack_int lda = sizeM; |
386 | +#ifdef __INTEL_COMPILER | |
351 | 387 | dgetrf(&sizeM, &sizeN, matrix, &lda, ipiv, &info); |
388 | +#else | |
389 | + info = LAPACKE_dgetrf_work(LAPACK_COL_MAJOR, sizeM, sizeN, matrix, lda, ipiv); | |
390 | +#endif | |
352 | 391 | if(info != 0){ |
353 | 392 | stringstream ss; |
354 | 393 | ss << errorMessageDgetrfInfo; |
@@ -1,365 +0,0 @@ | ||
1 | -//************************************************************************// | |
2 | -// Copyright (C) 2011-2012 Mikiya Fujii // | |
3 | -// // | |
4 | -// This file is part of MolDS. // | |
5 | -// // | |
6 | -// MolDS is free software: you can redistribute it and/or modify // | |
7 | -// it under the terms of the GNU General Public License as published by // | |
8 | -// the Free Software Foundation, either version 3 of the License, or // | |
9 | -// (at your option) any later version. // | |
10 | -// // | |
11 | -// MolDS is distributed in the hope that it will be useful, // | |
12 | -// but WITHOUT ANY WARRANTY; without even the implied warranty of // | |
13 | -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // | |
14 | -// GNU General Public License for more details. // | |
15 | -// // | |
16 | -// You should have received a copy of the GNU General Public License // | |
17 | -// along with MolDS. If not, see <http://www.gnu.org/licenses/>. // | |
18 | -//************************************************************************// | |
19 | -#include<stdio.h> | |
20 | -#include<stdlib.h> | |
21 | -#include<limits.h> | |
22 | -#include<iostream> | |
23 | -#include<sstream> | |
24 | -#include<math.h> | |
25 | -#include<string> | |
26 | -#include<stdexcept> | |
27 | -#include<boost/format.hpp> | |
28 | -#if ( __WORDSIZE == 32 ) | |
29 | -#else | |
30 | -#define HAVE_LAPACK_CONFIG_H | |
31 | -#define LAPACK_ILP64 | |
32 | -#endif | |
33 | -#include"lapacke.h" | |
34 | -#include"../base/PrintController.h" | |
35 | -#include"../base/MolDSException.h" | |
36 | -#include"../base/Uncopyable.h" | |
37 | -#include"Lapack.h" | |
38 | -using namespace std; | |
39 | -using namespace MolDS_base; | |
40 | - | |
41 | -namespace MolDS_wrappers{ | |
42 | -Lapack* Lapack::lapack = NULL; | |
43 | - | |
44 | -Lapack::Lapack(){ | |
45 | - this->errorMessageDsyevdInfo = "Error in wrappers::Lapack::Dsyevd: info != 0: info = "; | |
46 | - this->errorMessageDsyevdSize = "Error in wrappers::Lapack::Dsyevd: size of matirx < 1\n"; | |
47 | - this->errorMessageDsysvInfo = "Error in wrappers::Lapack::Dsysv: info != 0: info = "; | |
48 | - this->errorMessageDsysvSize = "Error in wrappers::Lapack::Dsysv: size of matirx < 1\n"; | |
49 | - this->errorMessageDgetrsInfo = "Error in wrappers::Lapack::Dgetrs: info != 0: info = "; | |
50 | - this->errorMessageDgetrsSize = "Error in wrappers::Lapack::Dgetrs: size of matirx < 1\n"; | |
51 | - this->errorMessageDgetrfInfo = "Error in wrappers::Lapack::Dgetrf: info != 0: info = "; | |
52 | -} | |
53 | - | |
54 | -Lapack::~Lapack(){ | |
55 | -} | |
56 | - | |
57 | -Lapack* Lapack::GetInstance(){ | |
58 | - if(lapack == NULL){ | |
59 | - lapack = new Lapack(); | |
60 | - //this->OutputLog("Lapack created.\n\n"); | |
61 | - } | |
62 | - return lapack; | |
63 | -} | |
64 | - | |
65 | -void Lapack::DeleteInstance(){ | |
66 | - if(lapack != NULL){ | |
67 | - delete lapack; | |
68 | - //this->OutputLog("Lapack deleted\n\n"); | |
69 | - } | |
70 | - lapack = NULL; | |
71 | -} | |
72 | - | |
73 | - | |
74 | -/*** | |
75 | - * | |
76 | - * return notice | |
77 | - * i-th eigen value is eigenValues[i]. | |
78 | - * i-th eigen vector is (matirx[i][0], matirx[i][1], matirx[i][2], ....). | |
79 | - * | |
80 | - * ***/ | |
81 | -molds_lapack_int Lapack::Dsyevd(double** matrix, double* eigenValues, molds_lapack_int size, bool calcEigenVectors){ | |
82 | - molds_lapack_int info = 0; | |
83 | - molds_lapack_int k = 0; | |
84 | - molds_lapack_int lwork; | |
85 | - molds_lapack_int liwork; | |
86 | - char job; | |
87 | - char uplo = 'U'; | |
88 | - molds_lapack_int lda = size; | |
89 | - double* convertedMatrix; | |
90 | - double* tempEigenValues; | |
91 | - double* work; | |
92 | - molds_lapack_int* iwork; | |
93 | - | |
94 | - // set job type | |
95 | - if(calcEigenVectors){ | |
96 | - job = 'V'; | |
97 | - } | |
98 | - else{ | |
99 | - job = 'N'; | |
100 | - } | |
101 | - | |
102 | - // calc. lwork and liwork | |
103 | - if(size < 1 ){ | |
104 | - stringstream ss; | |
105 | - ss << errorMessageDsyevdSize; | |
106 | - throw MolDSException(ss.str()); | |
107 | - } | |
108 | - else if(size == 1){ | |
109 | - lwork = 1; | |
110 | - liwork = 1; | |
111 | - } | |
112 | - else if(1 < size && job == 'N'){ | |
113 | - lwork = 2*size + 1; | |
114 | - liwork = 2; | |
115 | - } | |
116 | - else{ | |
117 | - // calc. k | |
118 | - double temp = log((double)size)/log(2.0); | |
119 | - if( (double)((molds_lapack_int)temp) < temp ){ | |
120 | - k = (molds_lapack_int)temp + 1; | |
121 | - } | |
122 | - else{ | |
123 | - k = (molds_lapack_int)temp; | |
124 | - } | |
125 | - lwork = 3*size*size + (5+2*k)*size + 1; | |
126 | - liwork = 5*size + 3; | |
127 | - } | |
128 | - | |
129 | - // malloc | |
130 | - work = (double*)LAPACKE_malloc( sizeof(double)*lwork ); | |
131 | - iwork = (molds_lapack_int*)LAPACKE_malloc( sizeof(molds_lapack_int)*liwork ); | |
132 | - convertedMatrix = (double*)LAPACKE_malloc( sizeof(double)*size*size ); | |
133 | - tempEigenValues = (double*)LAPACKE_malloc( sizeof(double)*size ); | |
134 | - | |
135 | - for(molds_lapack_int i = 0; i < size; i++){ | |
136 | - for(molds_lapack_int j = i; j < size; j++){ | |
137 | - convertedMatrix[i+j*size] = matrix[i][j]; | |
138 | - } | |
139 | - } | |
140 | - | |
141 | - // call Lapack | |
142 | - info = LAPACKE_dsyevd_work(LAPACK_COL_MAJOR, job, uplo, size, convertedMatrix, lda, tempEigenValues, work, lwork, iwork, liwork); | |
143 | - | |
144 | - for(molds_lapack_int i = 0; i < size; i++){ | |
145 | - for(molds_lapack_int j = 0; j < size; j++){ | |
146 | - matrix[i][j] = convertedMatrix[j+i*size]; //i-th row is i-th eigen vector | |
147 | - //matrix[j][i] = convertedMatrix[j+i*size]; //i-th column is i-th eigen vector | |
148 | - } | |
149 | - } | |
150 | - | |
151 | - for(molds_lapack_int i=0;i<size;i++){ | |
152 | - double temp = 0.0; | |
153 | - for(molds_lapack_int j=0;j<size;j++){ | |
154 | - temp += matrix[i][j]; | |
155 | - } | |
156 | - if(temp<0){ | |
157 | - for(molds_lapack_int j=0;j<size;j++){ | |
158 | - matrix[i][j]*=-1.0; | |
159 | - } | |
160 | - } | |
161 | - } | |
162 | - | |
163 | - for(molds_lapack_int i = 0; i < size; i++){ | |
164 | - eigenValues[i] = tempEigenValues[i]; | |
165 | - } | |
166 | - //this->OutputLog(boost::format("size=%d lwork=%d liwork=%d k=%d info=%d\n") % size % lwork % liwork % k % info); | |
167 | - | |
168 | - // free | |
169 | - LAPACKE_free(work); | |
170 | - LAPACKE_free(iwork); | |
171 | - LAPACKE_free(convertedMatrix); | |
172 | - LAPACKE_free(tempEigenValues); | |
173 | - | |
174 | - if(info != 0){ | |
175 | - stringstream ss; | |
176 | - ss << errorMessageDsyevdInfo; | |
177 | - ss << info << endl; | |
178 | - throw MolDSException(ss.str()); | |
179 | - } | |
180 | - return info; | |
181 | -} | |
182 | - | |
183 | -/*** | |
184 | - * | |
185 | - * Slove matrix*X=b, then we get X by this method. | |
186 | - * The X is stored in b. | |
187 | - * | |
188 | - */ | |
189 | -molds_lapack_int Lapack::Dsysv(double const* const* matrix, double* b, molds_lapack_int size){ | |
190 | - molds_lapack_int info = 0; | |
191 | - molds_lapack_int lwork; | |
192 | - char uplo = 'U'; | |
193 | - molds_lapack_int lda = size; | |
194 | - molds_lapack_int ldb = size; | |
195 | - molds_lapack_int nrhs = 1; | |
196 | - double* convertedMatrix; | |
197 | - double* work; | |
198 | - double* tempB; | |
199 | - molds_lapack_int* ipiv; | |
200 | - | |
201 | - if(size < 1 ){ | |
202 | - stringstream ss; | |
203 | - ss << errorMessageDsysvSize; | |
204 | - throw MolDSException(ss.str()); | |
205 | - } | |
206 | - | |
207 | - // malloc | |
208 | - ipiv = (molds_lapack_int*)LAPACKE_malloc( sizeof(molds_lapack_int)*2*size ); | |
209 | - convertedMatrix = (double*)LAPACKE_malloc( sizeof(double)*size*size ); | |
210 | - tempB = (double*)LAPACKE_malloc( sizeof(double)*size ); | |
211 | - | |
212 | - for(molds_lapack_int i = 0; i < size; i++){ | |
213 | - for(molds_lapack_int j = i; j < size; j++){ | |
214 | - convertedMatrix[i+j*size] = matrix[i][j]; | |
215 | - } | |
216 | - } | |
217 | - for(molds_lapack_int i = 0; i < size; i++){ | |
218 | - tempB[i] = b[i]; | |
219 | - } | |
220 | - | |
221 | - // calc. lwork | |
222 | - double blockSize=0.0; | |
223 | -#pragma omp critical | |
224 | - { | |
225 | - lwork = -1; | |
226 | - double tempWork[3]={0.0, 0.0, 0.0}; | |
227 | - info = LAPACKE_dsysv_work(LAPACK_COL_MAJOR, uplo, size, nrhs, convertedMatrix, lda, ipiv, tempB, ldb, tempWork, lwork); | |
228 | - blockSize = tempWork[0]/size; | |
229 | - } | |
230 | - info = 0; | |
231 | - lwork = blockSize*size; | |
232 | - work = (double*)LAPACKE_malloc( sizeof(double)*lwork ); | |
233 | - | |
234 | - // call Lapack | |
235 | - info = LAPACKE_dsysv_work(LAPACK_COL_MAJOR, uplo, size, nrhs, convertedMatrix, lda, ipiv, tempB, ldb, work, lwork); | |
236 | - for(molds_lapack_int i = 0; i < size; i++){ | |
237 | - b[i] = tempB[i]; | |
238 | - } | |
239 | - | |
240 | - // free | |
241 | - LAPACKE_free(convertedMatrix); | |
242 | - LAPACKE_free(ipiv); | |
243 | - LAPACKE_free(work); | |
244 | - LAPACKE_free(tempB); | |
245 | - | |
246 | - if(info != 0){ | |
247 | - stringstream ss; | |
248 | - ss << errorMessageDsysvInfo; | |
249 | - ss << info << endl; | |
250 | - throw MolDSException(ss.str()); | |
251 | - } | |
252 | - return info; | |
253 | -} | |
254 | - | |
255 | -/*** | |
256 | - * | |
257 | - * Slove matrix*X[i]=b[i] (i=0, 1, ... , nrhs-1), then we get X[i] by this method. | |
258 | - * The X[i] is stored in b[i]. | |
259 | - * b[i][j] is j-th element of i-th solution, b[i]. | |
260 | - * | |
261 | - */ | |
262 | -molds_lapack_int Lapack::Dgetrs(double const* const* matrix, double** b, molds_lapack_int size, molds_lapack_int nrhs) const{ | |
263 | - molds_lapack_int info = 0; | |
264 | - char trans = 'N'; | |
265 | - molds_lapack_int lda = size; | |
266 | - molds_lapack_int ldb = size; | |
267 | - double* convertedMatrix; | |
268 | - double* convertedB; | |
269 | - molds_lapack_int* ipiv; | |
270 | - | |
271 | - if(size < 1 ){ | |
272 | - stringstream ss; | |
273 | - ss << errorMessageDgetrsSize; | |
274 | - throw MolDSException(ss.str()); | |
275 | - } | |
276 | - | |
277 | - try{ | |
278 | - // malloc | |
279 | - ipiv = (molds_lapack_int*)LAPACKE_malloc( sizeof(molds_lapack_int)*2*size); | |
280 | - convertedMatrix = (double*)LAPACKE_malloc( sizeof(double)*size*size); | |
281 | - convertedB = (double*)LAPACKE_malloc( sizeof(double)*nrhs*size); | |
282 | - for(molds_lapack_int i = 0; i < size; i++){ | |
283 | - for(molds_lapack_int j = 0; j < size; j++){ | |
284 | - convertedMatrix[i+j*size] = matrix[i][j]; | |
285 | - } | |
286 | - } | |
287 | - for(molds_lapack_int i = 0; i < nrhs; i++){ | |
288 | - for(molds_lapack_int j = 0; j < size; j++){ | |
289 | - convertedB[j+i*size] = b[i][j]; | |
290 | - } | |
291 | - } | |
292 | - this->Dgetrf(convertedMatrix, ipiv, size, size); | |
293 | - info = LAPACKE_dgetrs_work(LAPACK_COL_MAJOR, trans, size, nrhs, convertedMatrix, lda, ipiv, convertedB, ldb); | |
294 | - for(molds_lapack_int i = 0; i < nrhs; i++){ | |
295 | - for(molds_lapack_int j = 0; j < size; j++){ | |
296 | - b[i][j] = convertedB[j+i*size]; | |
297 | - } | |
298 | - } | |
299 | - } | |
300 | - catch(MolDSException ex){ | |
301 | - // free | |
302 | - LAPACKE_free(convertedMatrix); | |
303 | - LAPACKE_free(convertedB); | |
304 | - LAPACKE_free(ipiv); | |
305 | - throw ex; | |
306 | - } | |
307 | - // free | |
308 | - LAPACKE_free(convertedMatrix); | |
309 | - LAPACKE_free(convertedB); | |
310 | - LAPACKE_free(ipiv); | |
311 | - | |
312 | - if(info != 0){ | |
313 | - stringstream ss; | |
314 | - ss << errorMessageDgetrsInfo; | |
315 | - ss << info << endl; | |
316 | - throw MolDSException(ss.str()); | |
317 | - } | |
318 | - return info; | |
319 | -} | |
320 | - | |
321 | -// Argument "matrix" is sizeM*sizeN matrix. | |
322 | -// Argument "matrix" will be LU-decomposed. | |
323 | -molds_lapack_int Lapack::Dgetrf(double** matrix, molds_lapack_int sizeM, molds_lapack_int sizeN) const{ | |
324 | - molds_lapack_int* ipiv = (molds_lapack_int*)LAPACKE_malloc( sizeof(molds_lapack_int)*2*sizeM ); | |
325 | - this->Dgetrf(matrix, ipiv, sizeM, sizeN); | |
326 | - LAPACKE_free(ipiv); | |
327 | - molds_lapack_int info = 0; | |
328 | - return info; | |
329 | -} | |
330 | - | |
331 | -// Argument "matrix" is sizeM*sizeN matrix. | |
332 | -// Argument "matrix" will be LU-decomposed. | |
333 | -molds_lapack_int Lapack::Dgetrf(double** matrix, molds_lapack_int* ipiv, molds_lapack_int sizeM, molds_lapack_int sizeN) const{ | |
334 | - double* convertedMatrix = (double*)LAPACKE_malloc( sizeof(double)*sizeM*sizeN ); | |
335 | - for(molds_lapack_int i=0; i<sizeM; i++){ | |
336 | - for(molds_lapack_int j=0; j<sizeN; j++){ | |
337 | - convertedMatrix[i+j*sizeM] = matrix[i][j]; | |
338 | - } | |
339 | - } | |
340 | - this->Dgetrf(convertedMatrix, ipiv, sizeM, sizeN); | |
341 | - for(molds_lapack_int i=0; i<sizeM; i++){ | |
342 | - for(molds_lapack_int j=0; j<sizeN; j++){ | |
343 | - matrix[i][j] = convertedMatrix[i+j*sizeM]; | |
344 | - } | |
345 | - } | |
346 | - LAPACKE_free(convertedMatrix); | |
347 | - molds_lapack_int info = 0; | |
348 | - return info; | |
349 | -} | |
350 | - | |
351 | -// Argument "matrix" means sizeM * sizeN matrix. | |
352 | -// The each element of "matrix" should be stored in 1-dimensional vecotre with column major (Fortran type). | |
353 | -molds_lapack_int Lapack::Dgetrf(double* matrix, molds_lapack_int* ipiv, molds_lapack_int sizeM, molds_lapack_int sizeN) const{ | |
354 | - molds_lapack_int info = 0; | |
355 | - molds_lapack_int lda = sizeM; | |
356 | - info = LAPACKE_dgetrf_work(LAPACK_COL_MAJOR, sizeM, sizeN, matrix, lda, ipiv); | |
357 | - if(info != 0){ | |
358 | - stringstream ss; | |
359 | - ss << errorMessageDgetrfInfo; | |
360 | - ss << info << endl; | |
361 | - throw MolDSException(ss.str()); | |
362 | - } | |
363 | - return info; | |
364 | -} | |
365 | -} |