1 | // This file is part of Eigen, a lightweight C++ template library
|
---|
2 | // for linear algebra.
|
---|
3 | //
|
---|
4 | // Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
|
---|
5 | //
|
---|
6 | // This Source Code Form is subject to the terms of the Mozilla
|
---|
7 | // Public License v. 2.0. If a copy of the MPL was not distributed
|
---|
8 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
---|
9 |
|
---|
10 | #ifndef EIGEN_PARALLELIZER_H
|
---|
11 | #define EIGEN_PARALLELIZER_H
|
---|
12 |
|
---|
13 | namespace Eigen {
|
---|
14 |
|
---|
15 | namespace internal {
|
---|
16 |
|
---|
17 | /** \internal */
|
---|
18 | inline void manage_multi_threading(Action action, int* v)
|
---|
19 | {
|
---|
20 | static EIGEN_UNUSED int m_maxThreads = -1;
|
---|
21 |
|
---|
22 | if(action==SetAction)
|
---|
23 | {
|
---|
24 | eigen_internal_assert(v!=0);
|
---|
25 | m_maxThreads = *v;
|
---|
26 | }
|
---|
27 | else if(action==GetAction)
|
---|
28 | {
|
---|
29 | eigen_internal_assert(v!=0);
|
---|
30 | #ifdef EIGEN_HAS_OPENMP
|
---|
31 | if(m_maxThreads>0)
|
---|
32 | *v = m_maxThreads;
|
---|
33 | else
|
---|
34 | *v = omp_get_max_threads();
|
---|
35 | #else
|
---|
36 | *v = 1;
|
---|
37 | #endif
|
---|
38 | }
|
---|
39 | else
|
---|
40 | {
|
---|
41 | eigen_internal_assert(false);
|
---|
42 | }
|
---|
43 | }
|
---|
44 |
|
---|
45 | }
|
---|
46 |
|
---|
47 | /** Must be call first when calling Eigen from multiple threads */
|
---|
48 | inline void initParallel()
|
---|
49 | {
|
---|
50 | int nbt;
|
---|
51 | internal::manage_multi_threading(GetAction, &nbt);
|
---|
52 | std::ptrdiff_t l1, l2;
|
---|
53 | internal::manage_caching_sizes(GetAction, &l1, &l2);
|
---|
54 | }
|
---|
55 |
|
---|
56 | /** \returns the max number of threads reserved for Eigen
|
---|
57 | * \sa setNbThreads */
|
---|
58 | inline int nbThreads()
|
---|
59 | {
|
---|
60 | int ret;
|
---|
61 | internal::manage_multi_threading(GetAction, &ret);
|
---|
62 | return ret;
|
---|
63 | }
|
---|
64 |
|
---|
65 | /** Sets the max number of threads reserved for Eigen
|
---|
66 | * \sa nbThreads */
|
---|
67 | inline void setNbThreads(int v)
|
---|
68 | {
|
---|
69 | internal::manage_multi_threading(SetAction, &v);
|
---|
70 | }
|
---|
71 |
|
---|
72 | namespace internal {
|
---|
73 |
|
---|
74 | template<typename Index> struct GemmParallelInfo
|
---|
75 | {
|
---|
76 | GemmParallelInfo() : sync(-1), users(0), rhs_start(0), rhs_length(0) {}
|
---|
77 |
|
---|
78 | int volatile sync;
|
---|
79 | int volatile users;
|
---|
80 |
|
---|
81 | Index rhs_start;
|
---|
82 | Index rhs_length;
|
---|
83 | };
|
---|
84 |
|
---|
85 | template<bool Condition, typename Functor, typename Index>
|
---|
86 | void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpose)
|
---|
87 | {
|
---|
88 | // TODO when EIGEN_USE_BLAS is defined,
|
---|
89 | // we should still enable OMP for other scalar types
|
---|
90 | #if !(defined (EIGEN_HAS_OPENMP)) || defined (EIGEN_USE_BLAS)
|
---|
91 | // FIXME the transpose variable is only needed to properly split
|
---|
92 | // the matrix product when multithreading is enabled. This is a temporary
|
---|
93 | // fix to support row-major destination matrices. This whole
|
---|
94 | // parallelizer mechanism has to be redisigned anyway.
|
---|
95 | EIGEN_UNUSED_VARIABLE(transpose);
|
---|
96 | func(0,rows, 0,cols);
|
---|
97 | #else
|
---|
98 |
|
---|
99 | // Dynamically check whether we should enable or disable OpenMP.
|
---|
100 | // The conditions are:
|
---|
101 | // - the max number of threads we can create is greater than 1
|
---|
102 | // - we are not already in a parallel code
|
---|
103 | // - the sizes are large enough
|
---|
104 |
|
---|
105 | // 1- are we already in a parallel session?
|
---|
106 | // FIXME omp_get_num_threads()>1 only works for openmp, what if the user does not use openmp?
|
---|
107 | if((!Condition) || (omp_get_num_threads()>1))
|
---|
108 | return func(0,rows, 0,cols);
|
---|
109 |
|
---|
110 | Index size = transpose ? cols : rows;
|
---|
111 |
|
---|
112 | // 2- compute the maximal number of threads from the size of the product:
|
---|
113 | // FIXME this has to be fine tuned
|
---|
114 | Index max_threads = std::max<Index>(1,size / 32);
|
---|
115 |
|
---|
116 | // 3 - compute the number of threads we are going to use
|
---|
117 | Index threads = std::min<Index>(nbThreads(), max_threads);
|
---|
118 |
|
---|
119 | if(threads==1)
|
---|
120 | return func(0,rows, 0,cols);
|
---|
121 |
|
---|
122 | Eigen::initParallel();
|
---|
123 | func.initParallelSession();
|
---|
124 |
|
---|
125 | if(transpose)
|
---|
126 | std::swap(rows,cols);
|
---|
127 |
|
---|
128 | GemmParallelInfo<Index>* info = new GemmParallelInfo<Index>[threads];
|
---|
129 |
|
---|
130 | #pragma omp parallel num_threads(threads)
|
---|
131 | {
|
---|
132 | Index i = omp_get_thread_num();
|
---|
133 | // Note that the actual number of threads might be lower than the number of request ones.
|
---|
134 | Index actual_threads = omp_get_num_threads();
|
---|
135 |
|
---|
136 | Index blockCols = (cols / actual_threads) & ~Index(0x3);
|
---|
137 | Index blockRows = (rows / actual_threads) & ~Index(0x7);
|
---|
138 |
|
---|
139 | Index r0 = i*blockRows;
|
---|
140 | Index actualBlockRows = (i+1==actual_threads) ? rows-r0 : blockRows;
|
---|
141 |
|
---|
142 | Index c0 = i*blockCols;
|
---|
143 | Index actualBlockCols = (i+1==actual_threads) ? cols-c0 : blockCols;
|
---|
144 |
|
---|
145 | info[i].rhs_start = c0;
|
---|
146 | info[i].rhs_length = actualBlockCols;
|
---|
147 |
|
---|
148 | if(transpose)
|
---|
149 | func(0, cols, r0, actualBlockRows, info);
|
---|
150 | else
|
---|
151 | func(r0, actualBlockRows, 0,cols, info);
|
---|
152 | }
|
---|
153 |
|
---|
154 | delete[] info;
|
---|
155 | #endif
|
---|
156 | }
|
---|
157 |
|
---|
158 | } // end namespace internal
|
---|
159 |
|
---|
160 | } // end namespace Eigen
|
---|
161 |
|
---|
162 | #endif // EIGEN_PARALLELIZER_H
|
---|