namespace mimas {

// Convolution of multi-arrays.
// Jan Wedekind (jan@wedesoft.de)
// Fri Jul 15 21:37:32 UTC 2005

template< typename _T >
struct correlate_element
{
  typedef correlate_element< _T > type;
  typedef boost::detail::multi_array::size_type size_type;
  typedef boost::detail::multi_array::index index_type;
  void operator()( _T *result, const _T *a, const _T *b,
                   const size_type *, const size_type *, const size_type *,
                   const index_type *, const index_type *,
                   const index_type * ) {
    *result += *a * *b;
  };
};

template< typename _T, int _N >
struct correlate_array
{
  typedef correlate_array< _T, _N > type;
  typedef boost::detail::multi_array::size_type size_type;
  typedef boost::detail::multi_array::index index_type;
  void operator()( _T *result, const _T *a, const _T *b,
                   const size_type *sizeR, const size_type *sizeA,
                   const size_type *sizeB,
                   const index_type *stepR, const index_type *stepA,
                   const index_type *stepB );
  typename boost::mpl::eval_if< boost::mpl::bool_< _N == 1 >,
                                 correlate_element< _T >,
                                correlate_array< _T, _N - 1 > >::type f;
};

template< typename _T, int _N >
void correlate_array< _T, _N >::operator()
  ( _T *result, const _T *a, const _T *b,
    const size_type *sizeR, const size_type *sizeA, const size_type *sizeB,
    const index_type *stepR, const index_type *stepA, const index_type *stepB )
{
   size_type
     curSizeA = *sizeA,
     curSizeB = *sizeB,
     curSizeR = *sizeR;
   index_type
     curStepA = *stepA,
     curStepB = *stepB,
     curStepR = *stepR;
   int
     fringe1 = ( ( curSizeA + curSizeB - 1 ) - curSizeR ) / 2,
     fringe2 = ( ( curSizeA + curSizeB     ) - curSizeR ) / 2;
   assert( fringe1 >= 0 );
   assert( fringe2 >= 0 );
   for ( int j=0; j<fringe1; j++ )
     for ( int k=fringe1-j; k<(signed)curSizeB; k++ ) {
       int j_plus_k = j - fringe1 + k;
       assert( j_plus_k >= 0 );
       assert( j_plus_k < (signed)curSizeR );
       f( result + j_plus_k * curStepR,
          a + j * curStepA,
          b + k * curStepB,
          sizeR + 1, sizeA + 1, sizeB + 1,
          stepR + 1, stepA + 1, stepB + 1 );
     };
   for ( int j=fringe1; j<(signed)curSizeA - fringe2; j++ )
     for ( int k=0; k<(signed)curSizeB; k++ ) {
       int j_plus_k = j - fringe1 + k;
       assert( j_plus_k >= 0 );
       assert( j_plus_k < (signed)curSizeR );
       f( result + j_plus_k * curStepR,
          a + j * curStepA,
          b + k * curStepB,
          sizeR + 1, sizeA + 1, sizeB + 1,
          stepR + 1, stepA + 1, stepB + 1 );
     };
   for ( int j=(signed)curSizeA - fringe2; j<(signed)curSizeA; j++ )
     for ( int k=0; k<(signed)curSizeB - j + (signed)curSizeA - fringe2 - 1;
           k++ ){
       int j_plus_k = j - fringe1 + k;
       assert( j_plus_k >= 0 );
       assert( j_plus_k < (signed)curSizeR );
       f( result + j_plus_k * curStepR,
          a + j * curStepA,
          b + k * curStepB,
          sizeR + 1, sizeA + 1, sizeB + 1,
          stepR + 1, stepA + 1, stepB + 1 );
     };
};

template< class _Array1, class _Array2 >
boost::multi_array< typename _Array1::element, _Array1::dimensionality >
  correlate( const _Array1 &x, const _Array2 &y )
{
  boost::array< std::size_t, _Array1::dimensionality > shape;
  // for ( int i=0; i<(signed)_Array1::dimensionality; i++ )
  //   shape[ i ] = x.shape()[ i ] + y.shape()[ i ] - 1;
  std::copy( x.shape(), x.shape() + _Array1::dimensionality, shape.begin() ); 
  boost::multi_array< typename _Array1::element, _Array1::dimensionality >
    retVal( shape );

  typedef boost::detail::multi_array::size_type size_type;
  typedef boost::detail::multi_array::index index_type;

  std::multimap< size_type, int > sizes;
  for ( int i=0; i<(signed)_Array1::dimensionality; i++ )
    sizes.insert( std::make_pair( y.shape()[i], i ) );
  
  size_type
    retValShape[ _Array1::dimensionality ],
    xShape[ _Array1::dimensionality ],
    yShapeSorted[ _Array1::dimensionality ];
  index_type
    retValStrides[ _Array1::dimensionality ],
    xStrides[ _Array1::dimensionality ],
    yStrides[ _Array1::dimensionality ];

  // Sort the loops, such that the inner loops for the filter 'y' have
  // the highest number of cycles.
  int j=0;
  for ( std::multimap< size_type, int >::const_iterator i=sizes.begin();
        i != sizes.end(); i++, j++ ) {
    int index = i->second;
    retValShape [ j ] = retVal.shape()[ index ];
    xShape      [ j ] = x     .shape()[ index ];
    yShapeSorted[ j ] = y     .shape()[ index ];
    retValStrides[ j ] = retVal.strides()[ index ];
    xStrides     [ j ] = x     .strides()[ index ];
    yStrides     [ j ] = y     .strides()[ index ];
  };
    
  correlate_array< typename _Array1::element, _Array1::dimensionality > f;
  f( retVal.origin(), x.origin(), y.origin(),
     &retValShape[0], &xShape[0], &yShapeSorted[0],
     &retValStrides[0], &xStrides[0], &yStrides[0] );
  return retVal;
};

#ifdef HAVE_LIBLAPACK
template< typename T >
std::vector< boost::multi_array< T, 2 > > separate
   ( const boost::const_multi_array_ref< T, 2 > &array )
{
  boost::numeric::ublas::matrix< T > A( array.shape()[1], array.shape()[0] );
  std::copy( array.data(), array.data() + array.num_elements(),
             A.data().begin() );
  boost::numeric::ublas::matrix< T, boost::numeric::ublas::column_major > u, v;
  boost::numeric::ublas::banded_matrix< T > d( gesdd< T >( A, &u, &v ) );
  boost::numeric::ublas::vector< T >
    s( boost::numeric::ublas::matrix_column< boost::numeric::ublas::matrix< T, boost::numeric::ublas::column_major > >
       ( u, 0 ) ),
    t( boost::numeric::ublas::matrix_row< boost::numeric::ublas::matrix< T, boost::numeric::ublas::column_major > >
       ( v, 0 ) );
  std::vector< boost::multi_array< T, 2 > > retval;
  boost::multi_array< T, 2 > sa( boost::extents[3][1] );
  std::copy( s.data().begin(), s.data().begin() + s.size(), sa.data() );
  retval.push_back( sa * d( 0, 0 ) );
  boost::multi_array< T, 2 > ta( boost::extents[1][3] );
  std::copy( t.data().begin(), t.data().begin() + t.size(), ta.data() );
  retval.push_back( ta );
  return retval;
}
#endif

};