namespace mimas {

template< typename T >
T _mul2( const T &x );

template  <typename T >
void _dxvec( T gx, T gy, short int w, signed char &dx, short int &dy )
{
  T
    agx = gx < 0 ? -gx : gx,
    agy = gy < 0 ? -gy : gy;
  if ( agy < agx ) {
    if ( _mul2( agy ) < agx )
      dy = 0;
    else
      dy = gy < 0 ? -w : w;
    dx = gx < 0 ? -1 : 1;
  } else {
    if ( _mul2( agx ) < agy )
      dx = 0;
    else
      dx = gx < 0 ? -1 : 1;
    dy = gy < 0 ? -w : w;
  };
}

template<
  typename T1, typename T2,
  typename TAPtr, typename TBPtr, typename TCPtr
>
image< bool > nonMaximaSuppression
  ( const const_image_ref< T1, TAPtr > &gradientX,
    const const_image_ref< T1, TBPtr > &gradientY,
    const const_image_ref< T2, TCPtr > &gradientSqr,
    T2 threshold )
{
  T2 thresholdSqr = threshold * threshold;

  image< bool > retVal; retVal.init( gradientSqr.getWidth(),
                                     gradientSqr.getHeight() );

  short int w = (short)gradientSqr.getWidth();
  const T2 *pgSqr = gradientSqr.rawData() + w + 1;
  const T1
    *pgX = gradientX.rawData() + w + 1,
    *pgY = gradientY.rawData() + w + 1;
  bool *pr = retVal.rawData() + w + 1;

  for ( int y=1; y<gradientSqr.getHeight()-1; y++ ) {
    for ( int x=1; x<gradientSqr.getWidth()-1; x++ ) {

      signed char dx;
      short int dy;

      _dxvec< T1 >( *pgX, *pgY, w, dx, dy );

      T2 g = *pgSqr;
      if ( g >= thresholdSqr && 
           g >= pgSqr[ - ( dy + dx ) ] &&
           g >= pgSqr[ + ( dy + dx ) ] )
        *pr = true;

      pgSqr++;
      pgX++;
      pgY++;
      pr++;
    };
    pgSqr += 2;
    pgX += 2;
    pgY += 2;
    pr += 2;
  };

  return retVal;
}

};