PHengLEI-NCCR/API/DataStruct/include/PHOps.hxx

765 lines
24 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// +
// Hybrid Platform for Engineering and Research of Flows (HyperFlow) +
// 面向流体工程与研究的混合CFD平台 +
// CZhang Laiping and He Xin +
// State Key Laboratory of Aerodynamics +
// Computational Aerodynamics Institute +
// China Aerodynamics Research and Development Center +
// Since 2007 +
// +
//++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
//++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// 文件头部说明: +
//++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// FILE: XXX.cpp (XXX.h) +
// AUTHOR(S): He Xin +
// PURPOSE: simulation object +
//++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
//++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// 函数头部说明: +
//++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// Function: 例如Simulation::Simulation() +
// AUTHOR(S): He Xin +
// PURPOSE: 具体说明本模块的功能 +
//++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
namespace PHSPACE
{
//! Operator() functions:
template<typename T, int N>
const T & PHArray<T, N>::operator()(int i0) const
{
//assertInRange(i0);
#ifdef USE_SMARTARRAY
return datap1[i0];
#else
return data_[i0 * stride_[0]];
#endif
}
template<typename T, int N>
T & PHArray<T, N>::operator()(int i0)
{
//assertInRange(i0);
#ifdef USE_SMARTARRAY
return datap1[i0];
#else
return data_[i0 * stride_[0]];
#endif
}
template<typename T, int N>
const T & PHArray<T, N>::operator()(int i0, int i1) const
{
//assertInRange(i0, i1);
#ifdef USE_SMARTARRAY
return datap2[i1][i0];
#else
return data_[i0 * stride_[0] + i1 * stride_[1]];
#endif
}
template<typename T, int N>
T & PHArray<T, N>::operator()(int i0, int i1)
{
//assertInRange(i0, i1);
#ifdef USE_SMARTARRAY
return datap2[i1][i0];
#else
return data_[i0 * stride_[0] + i1 * stride_[1]];
#endif
}
template<typename T, int N>
const T & PHArray<T, N>::operator()(int i0, int i1, int i2) const
{
//assertInRange(i0, i1, i2);
#ifdef USE_SMARTARRAY
return datap3[i2][i1][i0];
#else
return data_[i0 * stride_[0] + i1 * stride_[1]
+ i2 * stride_[2]];
#endif
}
template<typename T, int N>
T & PHArray<T, N>::operator()(int i0, int i1, int i2)
{
//assertInRange(i0, i1, i2);
#ifdef USE_SMARTARRAY
return datap3[i2][i1][i0];
#else
return data_[i0 * stride_[0] + i1 * stride_[1]
+ i2 * stride_[2]];
#endif
}
template<typename T, int N>
const T & PHArray<T, N>::operator()(int i0, int i1, int i2, int i3) const
{
//assertInRange(i0, i1, i2);
#ifdef USE_SMARTARRAY
return datap4[i3][i2][i1][i0];
#else
return data_[i0 * stride_[0] + i1 * stride_[1]
+ i2 * stride_[2] + i3 * stride_[3]];
#endif
}
template<typename T, int N>
T & PHArray<T, N>::operator()(int i0, int i1, int i2, int i3)
{
//assertInRange(i0, i1, i2);
#ifdef USE_SMARTARRAY
return datap4[i3][i2][i1][i0];
#else
return data_[i0 * stride_[0] + i1 * stride_[1]
+ i2 * stride_[2] + i3 * stride_[3]];
#endif
}
template<typename T, int N>
const T & PHArray<T, N>::operator()(int i0, int i1, int i2, int i3, int i4) const
{
#ifdef USE_SMARTARRAY
return datap5[i4][i3][i2][i1][i0];
#else
return data_[i0 * stride_[0] + i1 * stride_[1]
+ i2 * stride_[2] + i3 * stride_[3] + i4 * stride_[4]];
#endif
}
template<typename T, int N>
T & PHArray<T, N>::operator()(int i0, int i1, int i2, int i3, int i4)
{
#ifdef USE_SMARTARRAY
return datap5[i4][i3][i2][i1][i0];
#else
return data_[i0 * stride_[0] + i1 * stride_[1]
+ i2 * stride_[2] + i3 * stride_[3] + i4 * stride_[4]];
#endif
}
//!ugks
template<typename T, int N>
const T & PHArray<T, N>::operator()(int i0, int i1, int i2, int i3, int i4, int i5) const
{
#ifdef USE_SMARTARRAY
return datap6[i5][i4][i3][i2][i1][i0];
#else
return data_[i0 * stride_[0] + i1 * stride_[1]
+ i2 * stride_[2] + i3 * stride_[3] + i4 * stride_[4] + i5 * stride_[5]];
#endif
}
//!ugks
template<typename T, int N>
T & PHArray<T, N>::operator()(int i0, int i1, int i2, int i3, int i4, int i5)
{
#ifdef USE_SMARTARRAY
return datap6[i5][i4][i3][i2][i1][i0];
#else
return data_[i0 * stride_[0] + i1 * stride_[1]
+ i2 * stride_[2] + i3 * stride_[3] + i4 * stride_[4]+i5 * stride_[5]];
#endif
}
template<typename T, int N>
const T & PHArray<T, N>::operator[](int i) const
{
return data_[i];
}
template<typename T, int N>
T & PHArray<T, N>::operator[](int i)
{
return data_[i];
}
template<typename T, int N>
template<int N_rank2>
const T & PHArray<T, N>::operator()(const SimpleArray<int, N_rank2> &index) const
{
return data_[dot(index, stride_)];
}
template<typename T, int N>
template<int N_rank2>
T & PHArray<T, N>::operator()(const SimpleArray<int, N_rank2> &index)
{
return data_[dot(index, stride_)];
}
template<typename T, int N>
PHArray<T, N> PHArray<T, N>::operator()(Range r0) const
{
return PHArray<T, N>(noConst(), r0);
}
template<typename T, int N>
PHArray<T, N> PHArray<T, N>::operator()(Range r0, Range r1) const
{
return PHArray<T, N>(noConst(), r0, r1);
}
template<typename T, int N>
PHArray<T, N> PHArray<T, N>::operator()(Range r0, Range r1, Range r2) const
{
return T_array(noConst(), r0, r1, r2);
}
template<typename T, int N>
PHArray<T, N> PHArray<T, N>::operator()(Range r0, Range r1, Range r2, Range r3) const
{
return PHArray<T, N>(noConst(), r0, r1, r2, r3);
}
template<typename T, int N>
PHArray<T, N> PHArray<T, N>::operator()(Range r0, Range r1, Range r2, Range r3, Range r4) const
{
return PHArray<T, N>(noConst(), r0, r1, r2, r3, r4);
}
template<typename T, int N>
PHArray<T, N> PHArray<T, N>::operator()(Range r0, Range r1, Range r2, Range r3, Range r4, Range r5) const
{
return PHArray<T, N>(noConst(), r0, r1, r2, r3, r4, r5);
}
template<typename T, int N>
PHArray<T, N> PHArray<T, N>::operator()(Range r0, Range r1, Range r2, Range r3, Range r4, Range r5, Range r6) const
{
return PHArray<T, N>(noConst(), r0, r1, r2, r3, r4, r5, r6);
}
template<typename T, int N>
template<typename T1, typename T2>
typename PHSliceInfo<T, T1, T2>::T_slice
PHArray<T, N>::operator()(T1 r1, T2 r2) const
{
typedef typename PHSliceInfo<T, T1, T2>::T_slice slice;
return slice(noConst(), r1, r2, PHnilArraySection(), PHnilArraySection(), PHnilArraySection(),
PHnilArraySection(), PHnilArraySection(), PHnilArraySection(),
PHnilArraySection(), PHnilArraySection(), PHnilArraySection());
}
template<typename T, int N>
template<typename T1, typename T2, typename T3>
typename PHSliceInfo<T, T1, T2, T3>::T_slice
PHArray<T, N>::operator()(T1 r1, T2 r2, T3 r3) const
{
typedef typename PHSliceInfo<T, T1, T2, T3>::T_slice slice;
return slice(noConst(), r1, r2, r3, PHnilArraySection(), PHnilArraySection(), PHnilArraySection(),
PHnilArraySection(), PHnilArraySection(), PHnilArraySection(),
PHnilArraySection(), PHnilArraySection());
}
template<typename T, int N>
template<typename T1, typename T2, typename T3, typename T4>
typename PHSliceInfo<T, T1, T2, T3, T4>::T_slice
PHArray<T, N>::operator()(T1 r1, T2 r2, T3 r3, T4 r4) const
{
typedef typename PHSliceInfo<T, T1, T2, T3, T4>::T_slice slice;
return slice(noConst(), r1, r2, r3, r4, PHnilArraySection(), PHnilArraySection(),
PHnilArraySection(), PHnilArraySection(), PHnilArraySection(),
PHnilArraySection(), PHnilArraySection());
}
template<typename T, int N>
template<typename T1, typename T2, typename T3, typename T4, typename T5>
typename PHSliceInfo<T, T1, T2, T3, T4, T5>::T_slice
PHArray<T, N>::operator()(T1 r1, T2 r2, T3 r3, T4 r4, T5 r5) const
{
typedef typename PHSliceInfo<T, T1, T2, T3, T4, T5>::T_slice slice;
return slice(noConst(), r1, r2, r3, r4, r5, PHnilArraySection(),
PHnilArraySection(), PHnilArraySection(), PHnilArraySection(),
PHnilArraySection(), PHnilArraySection());
}
template<typename T, int N>
template<typename T1, typename T2, typename T3, typename T4, typename T5, typename T6>
typename PHSliceInfo<T, T1, T2, T3, T4, T5, T6>::T_slice
PHArray<T, N>::operator()(T1 r1, T2 r2, T3 r3, T4 r4, T5 r5, T6 r6) const
{
typedef typename PHSliceInfo<T, T1, T2, T3, T4, T5, T6>::T_slice slice;
return slice(noConst(), r1, r2, r3, r4, r5, r6, PHnilArraySection(), PHnilArraySection(), PHnilArraySection(),
PHnilArraySection(), PHnilArraySection());
}
#define PH_ARRAY_UPDATE(op, name) \
template < typename P_numtype, int N_rank > \
template < typename T > \
inline PHArray<P_numtype, N_rank>& \
PHArray<P_numtype, N_rank>::operator op(const T &expr) \
{ \
evaluate(typename PHExprWrap<T>::T_expr(expr), \
name<T_numtype, typename PHExprWrap<T>::T_expr::T_numtype>()); \
return *this; \
}
PH_ARRAY_UPDATE(+=, fy_plus_update)
PH_ARRAY_UPDATE(-=, fy_minus_update)
PH_ARRAY_UPDATE(*=, fy_multiply_update)
PH_ARRAY_UPDATE(/=, fy_divide_update)
template < typename T_numtype, int N_rank > template < typename T_expr, typename T_update >
PHArray<T_numtype, N_rank>&
PHArray<T_numtype, N_rank>::evaluateWithIndexTraversal1(T_expr expr, T_update)
{
SimpleArray<int, N_rank> index;
if (stride(firstRank) == 1)
{
int first = lbound(firstRank);
int last = ubound(firstRank);
T_numtype *iter = data_ + first;
for (index[0] = first; index[0] <= last; ++ index[0])
{
T_update::update(*iter ++, expr(index));
int kk = 1;
int i = *iter;
//T_update::update(*iter++,3);
//PHUpdate<int,int>::update(kk,2);
int kkk = 1;
}
}
else
{
PHFastArrayIterator<T_numtype, N_rank> iter(*this);
iter.loadStride(0);
int first = lbound(firstRank);
int last = ubound(firstRank);
for (index[0] = first; index[0] <= last; ++ index[0])
{
T_update::update(*const_cast<T_numtype*>(iter.data()), expr(index));
iter.advance();
}
}
return *this;
}
template < typename T_numtype, int N_rank > template < typename T_expr, typename T_update >
PHArray<T_numtype, N_rank>&
PHArray<T_numtype, N_rank>::evaluateWithIndexTraversalN(T_expr expr, T_update)
{
// Do a stack-type traversal for the destination array and use
// index traversal for the source expression
const int maxRank = ordering(0);
PHFastArrayIterator<T_numtype, N_rank> iter(*this);
for (int i = 1; i < N_rank; ++ i)
{
iter.push(ordering(i));
}
iter.loadStride(maxRank);
SimpleArray<int, N_rank> index, last;
index = storage_.base();
for (int i = 0; i < N_rank; ++ i)
{
last[i] = storage_.base(i) + length_[i];
}
// int lastLength = length(maxRank);
while (true)
{
int a = base(maxRank);
int b = last[maxRank];
for (index[maxRank] = base(maxRank); index[maxRank] < last[maxRank]; ++ index[maxRank])
{
T_update::update(*const_cast<T_numtype*>(iter.data()), expr(index));
iter.advance();
}
int j = 1;
for (; j < N_rank; ++ j)
{
iter.pop(ordering(j));
iter.loadStride(ordering(j));
iter.advance();
index[ordering(j-1)] = base(ordering(j-1));
++ index[ordering(j)];
if (index[ordering(j)] != last[ordering(j)])
{
break;
}
}
if (j == N_rank)
{
break;
}
for (; j > 0; -- j)
{
iter.push(ordering(j));
}
iter.loadStride(maxRank);
}
return *this;
}
template < typename T_numtype, int N_rank > template < typename T_expr, typename T_update >
PHArray<T_numtype, N_rank>&
PHArray<T_numtype, N_rank>::evaluateWithStackTraversal1(T_expr expr, T_update)
{
PHFastArrayIterator<T_numtype, N_rank> iter(*this);
iter.loadStride(firstRank);
expr.loadStride(firstRank);
const T_numtype *last = iter.data() + length(firstRank) * stride(firstRank);
while (iter.data() != last)
{
T_update::update(*const_cast<T_numtype*>(iter.data()), *expr);
iter.advance();
expr.advance();
}
return *this;
}
template < typename T_numtype, int N_rank > template < typename T_expr, typename T_update >
PHArray<T_numtype, N_rank>&
PHArray<T_numtype, N_rank>::evaluateWithStackTraversalN(T_expr expr, T_update)
{
/*
* A stack traversal replaces the usual nested loops:
*
* for (int i=A.lbound(firstDim); i <= A.ubound(firstDim); ++i)
* for (int j=A.lbound(secondDim); j <= A.ubound(secondDim); ++j)
* for (int k=A.lbound(thirdDim); k <= A.ubound(thirdDim); ++k)
* A(i,j,k) = 0;
*
* with a stack data structure. The stack allows this single
* routine to replace any number of nested loops.
*
* For each dimension (loop), these quantities are needed:
* - a pointer to the first element encountered in the loop
* - the stride associated with the dimension/loop
* - a pointer to the last element encountered in the loop
*
* The basic idea is that entering each loop is a "push" onto the
* stack, and exiting each loop is a "pop". In practice, this
* routine treats accesses the stack in a random-access way,
* which confuses the picture a bit. But conceptually, that's
* what is going on.
*/
/*
* ordering(0) gives the dimension associated with the smallest
* stride (usually; the exceptions have to do with subarrays and
* are uninteresting). We call this dimension maxRank; it will
* become the innermost "loop".
*
* Ordering the loops from ordering(N_rank-1) down to
* ordering(0) ensures that the largest stride is associated
* with the outermost loop, and the smallest stride with the
* innermost. This is critical for good performance on
* cached machines.
*/
const int maxRank = ordering(0);
// const int secondLastRank = ordering(1);
// Create an iterator for the array receiving the result
PHFastArrayIterator<T_numtype, N_rank> iter(*this);
// Set the initial stack configuration by pushing the pointer
// to the first element of the array onto the stack N times.
int i;
for (i = 1; i < N_rank; ++ i)
{
iter.push(i);
expr.push(i);
}
// Load the strides associated with the innermost loop.
iter.loadStride(maxRank);
expr.loadStride(maxRank);
/*
* Is the stride in the innermost loop equal to 1? If so,
* we might take advantage of this and generate more
* efficient code.
*/
bool useUnitStride = iter.isUnitStride(maxRank) && expr.isUnitStride(maxRank);
/*
* Do all array operands share a common stride in the innermost
* loop? If so, we can generate more efficient code (but only
* if this optimization has been enabled).
*/
#ifdef BZ_ARRAY_EXPR_USE_COMMON_STRIDE
int commonStride = expr.suggestStride(maxRank);
if (iter.suggestStride(maxRank) > commonStride)
{
commonStride = iter.suggestStride(maxRank);
}
bool useCommonStride = iter.isStride(maxRank, commonStride) && expr.isStride(maxRank, commonStride);
#else
int commonStride = 1;
bool useCommonStride = false;
#endif
/*
* The "last" array contains a pointer to the last element
* encountered in each "loop".
*/
const T_numtype *last[N_rank];
// Set up the initial state of the "last" array
for (i = 1; i < N_rank; ++ i)
{
last[i] = iter.data() + length(ordering(i)) * stride(ordering(i));
}
int lastLength = length(maxRank);
int firstNoncollapsedLoop = 1;
/*
* This bit of code handles collapsing loops. When possible,
* the N nested loops are converted into a single loop (basically,
* the N-dimensional array is treated as a long vector).
* This is important for cases where the length of the innermost
* loop is very small, for example a 100x100x3 array.
* If this code can't collapse all the loops into a single loop,
* it will collapse as many loops as possible starting from the
* innermost and working out.
*/
// Collapse loops when possible
for (i = 1; i < N_rank; ++ i)
{
// Figure out which pair of loops we are considering combining.
int outerLoopRank = ordering(i);
int innerLoopRank = ordering(i-1);
/*
* The canCollapse() routines look at the strides and extents
* of the loops, and determine if they can be combined into
* one loop.
*/
if (canCollapse(outerLoopRank, innerLoopRank) && expr.canCollapse(outerLoopRank, innerLoopRank))
{
lastLength *= length(outerLoopRank);
firstNoncollapsedLoop = i+1;
}
else
{
break;
}
}
/*
* Now we actually perform the loops. This while loop contains
* two parts: first, the innermost loop is performed. Then we
* exit the loop, and pop our way down the stack until we find
* a loop that isn't completed. We then restart the inner loops
* and push them onto the stack.
*/
while (true)
{
/*
* This bit of code handles the innermost loop. It would look
* a lot simpler if it weren't for unit stride and common stride
* optimizations; these clutter up the code with multiple versions.
*/
if ((useUnitStride) || (useCommonStride))
{
/*
* The check for BZ_USE_FAST_READ_ARRAY_EXPR can probably
* be taken out. This was put in place while the unit stride/
* common stride optimizations were being implemented and
* tested.
*/
// Compute the end of the innermost loop
int ubound = lastLength * commonStride;
/*
* This is a real kludge. I didn't want to have to write
* a const and non-const version of FastArrayIterator, so I use a
* const iterator and cast away const. This could
* probably be avoided with some trick, but the whole routine
* is ugly, so why bother.
*/
T_numtype *data = const_cast<T_numtype*>(iter.data());
/*
* BZ_NEEDS_WORK-- need to implement optional unrolling.
*/
if (commonStride == 1)
{
for (i = 0; i < ubound; ++ i)
{
T_update::update(*data++, expr.fastRead(i));
}
}
else
{
for (i = 0; i != ubound; i += commonStride)
{
T_update::update(data[i], expr.fastRead(i));
}
}
/*
* Tidy up for the fact that we haven't actually been
* incrementing the iterators in the innermost loop, by
* faking it afterward.
*/
iter.advance(lastLength *commonStride);
expr.advance(lastLength *commonStride);
}
else
{
/*
* We don't have a unit stride or common stride in the innermost
* loop. This is going to hurt performance. Luckily 95% of
* the time, we hit the cases above.
*/
T_numtype *end = const_cast<T_numtype*>(iter.data()) + lastLength * stride(maxRank);
while (iter.data() != end)
{
T_update::update(*const_cast<T_numtype*>(iter.data()), *expr);
iter.advance();
expr.advance();
}
}
/*
* We just finished the innermost loop. Now we pop our way down
* the stack, until we hit a loop that hasn't completed yet.
*/
int j = firstNoncollapsedLoop;
for (; j < N_rank; ++ j)
{
// Get the next loop
int r = ordering(j);
// Pop-- this restores the data pointers to the first element
// encountered in the loop.
iter.pop(j);
expr.pop(j);
// Load the stride associated with this loop, and increment
// once.
iter.loadStride(r);
expr.loadStride(r);
iter.advance();
expr.advance();
// If we aren't at the end of this loop, then stop popping.
if (iter.data() != last[j])
{
break;
}
}
// Are we completely done?
if (j == N_rank)
{
break;
}
// No, so push all the inner loops back onto the stack.
for (; j >= firstNoncollapsedLoop; --j)
{
int r2 = ordering(j-1);
iter.push(j);
expr.push(j);
last[j-1] = iter.data() + length(r2) * stride(r2);
}
// Load the stride for the innermost loop again.
iter.loadStride(maxRank);
expr.loadStride(maxRank);
}
return *this;
}
template < typename T_numtype, int N_rank > template < typename T_expr, typename T_update >
PHArray<T_numtype, N_rank>&
PHArray<T_numtype, N_rank>::evaluate(T_expr expr, T_update)
{
// Check that all arrays have the same shape
/*
* Check that the arrays are not empty (e.g. length 0 arrays)
* This fixes a bug found by Peter Bienstman, 6/16/99, where
* Array<RDouble,2> A(0,0),B(0,0); B=A(tensor::j,tensor::i);
* went into an infinite loop.
*/
int N_rankTmp = N_rank;
if (numElements() == 0)
{
return *this;
}
if (N_rankTmp == 1)
{
return evaluateWithStackTraversal1(expr, T_update());
}
else
{
return evaluateWithStackTraversalN(expr, T_update());
}
}
template < typename T, int N >
PHArray<T, N> & PHArray<T, N>::operator=(T x)
{
return initialize(x);
}
template < typename T, int N > template < typename T_expr >
PHArray<T, N>&
PHArray<T, N>::operator = (const PHETBase<T_expr> &expr)
{
evaluate(expr.unwrap(), PHUpdate<T_numtype, typename T_expr::T_numtype>());
return *this;
}
template < typename T, int N >
PHArray<T, N>&
PHArray<T, N>::operator = (const PHArray<T, N> &x)
{
(*this) = PHArrayExpr< PHFastArrayIterator<T, N> >(x.beginFast());
return *this;
}
template < typename T, int N >
PHArray<T, N>&
PHArray<T, N>::initialize(T x)
{
(*this) = PHArrayExpr< PHArrayExprConstant<T > >(x);
return *this;
}
}