// -*- c++ -*-
//
// $COPYRIGHT$
//
//===========================================================================

#ifndef _MTL_MTL_H_
#define _MTL_MTL_H_

#include <functional>
#include "mtl/mtl_complex.h"

/* namespace polution from <sys/sysmacros.h> */
#undef major
#undef minor

#include "mtl/mtl_algo.h"
#include "mtl/dense1D.h"
#include "mtl/mtl_exception.h"
#include "mtl/matrix_traits.h"
#include "mtl/transform_iterator.h"
#include "mtl/scaled1D.h"
#include "mtl/abs.h"

#include "mtl/fast.h"

#if USE_BLAIS
#include "mtl/blais.h"
#endif

#include "mtl/matrix.h"

namespace mtl {

#include "mtl/dim_calc.h"

template <class Vector> inline
typename linalg_traits<Vector>::value_type
__sum(const Vector& x, fast::count<0>)
{
  typedef typename linalg_traits<Vector>::value_type vt;
  return mtl_algo::accumulate(x.begin(), x.end(), vt());
}

#if USE_BLAIS
template <class Vector, int N> inline
typename linalg_traits<Vector>::value_type
__sum(const Vector& x, fast::count<N>)
{
  typedef typename linalg_traits<Vector>::value_type vt;
  return fast::accumulate(x.begin(), fast::count<N>(), vt());
}
#endif

//: Sum:  <tt>s <- sum_i(x(i))</tt>
//!category: algorithms
//!component: function
//!definition: mtl.h
//!example: vec_sum.cc
//!complexity: linear
//!typereqs: The addition operator must be defined for <TT>Vector::value_type</TT>.
// The sum of all of the elements in the container.
template <class Vector> inline
typename linalg_traits<Vector>::value_type
sum(const Vector& x)
{
  return __sum(x, dim_n<Vector>::RET());
}

#include "mtl/mtl_set.h"

template <class Vector, class T> inline
void
oned_scale(Vector x, const T& alpha, fast::count<0>)
{
  mtl_algo::transform(x.begin(), x.end(), x.begin(),
		      std::bind1st(std::multiplies<T>(), alpha));
}
#if USE_BLAIS
template <class Vector, class T, int N> inline
void
oned_scale(Vector x, const T& alpha, fast::count<N>)
{
  fast::transform(x.begin(), fast::count<N>(), x.begin(),
		  std::bind1st(std::multiplies<T>(), alpha));
}
#endif

template <class Vector, class T> inline
void
scale(Vector x, const T& alpha, oned_tag)
{
  oned_scale(x, alpha, dim_n<Vector>::RET());
}

template <class Matrix, class T>
inline void
scale(Matrix& A, const T& alpha, twod_tag)
{
  typename Matrix::iterator i;
  typename Matrix::OneD::iterator j, jend;
  for (i = A.begin(); i != A.end(); ++i) {
    j = (*i).begin(); jend = (*i).end();
    for (; j != jend; ++j)
      *j *= alpha;
  }
}


//: Scale:  <tt>A <- alpha*A or x <- alpha x</tt>
//
// Multiply all the elements in <tt>A</tt> (or <tt>x</tt>) by
// <tt>alpha</tt>.
// 
//!category: algorithms
//!component: function
//!example: vec_scale_algo.cc
//!complexity: O(n)
//!definition: mtl.h
//!typereqs: <TT>Vector</TT> must be mutable
//!typereqs: <TT>T</TT> is convertible to <TT>Vector</TT>'s <TT>value_type</TT>
//!typereqs: The multiplication operator must be defined for <TT>Vector::value_type</TT> and <tt>T</tt>
template <class LinalgObj, class T>
inline void
scale(LinalgObj A, const T& alpha)
{
  typedef typename linalg_traits<LinalgObj>::dimension Dim;
  scale(A, alpha, Dim());  
}



//: Set Diagonal:  <tt>A(i,i) <- alpha</tt>
//
// Set the value of the elements on the main diagonal of A to alpha.
//
//!category: algorithms
//!component: function
//!definition: mtl.h
//!example: tri_pack_sol.cc
//!typereqs: <tt>T</tt> must be convertible to <tt>Matrix::value_type</tt>.
//!complexity: O(min(m,n)) for dense matrices, O(nnz) for sparse matrices (except envelope, which is O(m))
template <class Matrix, class T>
inline void
set_diagonal(Matrix& A, const T& alpha)
{
  typedef typename mtl::matrix_traits<Matrix>::size_type Int;
  if (! A.is_unit())
    for (Int i = 0; i < A.nrows() && i < A.ncols(); ++i)
      A(i,i) = alpha;
}


//: add absolute value
//!noindex:
struct abs_add {
  template <class T, class U>
  T operator()(const T& a, const U& b) {
    return a + std::abs(b);
  }
};

template <class Vector>
inline typename linalg_traits<Vector>::magnitude_type
oned_one_norm(const Vector& x, fast::count<0>)
{
  typedef typename
     number_traits<typename Vector::value_type>::magnitude_type T;
  return mtl_algo::accumulate(x.begin(), x.end(), T(), abs_add());
}
#if USE_BLAIS
template <class Vector, int N>
inline typename linalg_traits<Vector>::magnitude_type
oned_one_norm(const Vector& x, fast::count<N>)
{
  typedef typename
     number_traits<typename Vector::value_type>::magnitude_type T;
  return fast::accumulate(x.begin(), fast::count<N>(), T(), abs_add());
}
#endif

template <class Vector>
inline typename linalg_traits<Vector>::magnitude_type
one_norm(const Vector& x, oned_tag)
{
  return oned_one_norm(x, dim_n<Vector>::RET());
}


//: add square
//!noindex:
struct sqr_add { 
  template <class T, class U>
  T operator()(const T& a, const U& b) {
    return a + std::abs(b * b);
  }
};

template <class Vector>
inline typename linalg_traits<Vector>::magnitude_type
oned_two_norm(const Vector& x, fast::count<0>)
{
  typedef typename number_traits<typename Vector::value_type>::magnitude_type T;
  return sqrt(mtl_algo::accumulate(x.begin(), x.end(), T(), sqr_add()));
}
#if USE_BLAIS
template <class Vector, int N>
inline typename linalg_traits<Vector>::magnitude_type
oned_two_norm(const Vector& x, fast::count<N>)
{
  typedef typename number_traits<typename Vector::value_type>::magnitude_type T;
  return sqrt(fast::accumulate(x.begin(), fast::count<N>(), T(), sqr_add()));
}
#endif

//: Two Norm: <tt>s <- sqrt(sum_i(x(i)^2))</tt>
//
//  The square root of the sum of the squares of the elements of the container.
//
//!category: algorithms
//!component: function
//!definition: mtl.h
//!example: vec_two_norm.cc
//!complexity: O(n)
//!typereqs: <tt>Vector</tt> must have an associated magnitude_type that is the type of the absolute value of <tt>Vector::value_type</tt>.
//!typereqs: There must be <tt>std::abs()</tt> defined for <tt>Vector::value_type</tt>.
//!typereqs: The addition must be defined for magnitude_type.
//!typereqs: <tt>sqrt()</tt> must be defined for magnitude_type.
template <class Vector>
inline typename linalg_traits<Vector>::magnitude_type
two_norm(const Vector& x)
{
  return oned_two_norm(x, dim_n<Vector>::RET());
}

//: compare absolute values
//!noindex:
struct abs_cmp { template <class T>
bool operator()(const T& a, const T& b) {
  return std::abs(a) < std::abs(b);
}};


template <class Vec>
inline typename linalg_traits<Vec>::magnitude_type
infinity_norm(const Vec& x, oned_tag)
{
  return std::abs(*mtl_algo::max_element(x.begin(), x.end(), abs_cmp()));
}



//: use by one and inf norm
//!noindex:
template <class Matrix>
inline typename linalg_traits<Matrix>::magnitude_type
__major_norm(const Matrix& A)
{
  typedef linalg_traits<Matrix>::magnitude_type T;
  typedef matrix_traits<Matrix>::size_type Int;
  T norm = 0;
  T sum = 0;
  typename Matrix::const_iterator i;
  typename Matrix::OneD::const_iterator j;
  i = A.begin();

  /* get the first sum */
  if (i != A.end()) {
    j = (*i).begin();
    sum = T(0);
    for (; j != (*i).end(); ++j)
      sum = sum + std::abs(*j);
    norm = sum;
    ++i;
  }

  for (; i != A.end(); ++i) {
    j = (*i).begin();
    if (A.is_unit() && Int(i.index()) < MTL_MIN(A.nrows(), A.ncols()))
      sum = T(1);
    else sum = T(0);

    for (; j != (*i).end(); ++j)
      sum = sum + std::abs(*j);
    norm = MTL_MAX(std::abs(norm), std::abs(sum));
  }
  return norm;
}

//: used by one and inf norm
//!noindex:
template <class Matrix>
inline typename linalg_traits<Matrix>::magnitude_type
__minor_norm(const Matrix& A)
{
  typedef linalg_traits<Matrix>::magnitude_type T;
  typedef matrix_traits<Matrix>::size_type Int;
  typename Matrix::const_iterator i;
  typename Matrix::OneD::const_iterator j, jend;

  dense1D<T> sums(A.minor(), T());
  if (A.is_unit()) {
    for (Int x = 0; x < MTL_MIN(A.nrows(), A.ncols()); ++x)
      sums[x] = T(1);
  }

  for (i = A.begin(); i != A.end(); ++i) {
    j = (*i).begin(); jend = (*i).end();
    for (; j != jend; ++j)
      sums[j.index()] += std::abs(*j);
  }

  return infinity_norm(sums, oned_tag());
}


/* this handles both the major and minor norm
 for symmetric matrices */

template <class Matrix>
inline typename linalg_traits<Matrix>::magnitude_type
symmetric_norm(const Matrix& A, row_tag)
{
  typedef linalg_traits<Matrix>::magnitude_type T;
  typename Matrix::const_iterator i;
  typename Matrix::OneD::const_iterator j, jend;
  
  dense1D<T> sums(A.minor(), T(0));

  for (i = A.begin(); i != A.end(); ++i) {
    j = (*i).begin();
    jend = (*i).end();
    if (A.is_upper()) { /* handle the diagonal elements */
      sums[j.row()] += std::abs(*j);
      ++j;
    } else
      --jend;
    for (; j != jend; ++j) {
      sums[j.row()] += std::abs(*j);
      sums[j.column()] += std::abs(*j);
    }
    if (A.is_lower())
      sums[j.row()] += std::abs(*j);
  }
  return infinity_norm(sums, oned_tag());
}

template <class Matrix>
inline typename linalg_traits<Matrix>::magnitude_type
symmetric_norm(const Matrix& A, column_tag)
{
  typedef linalg_traits<Matrix>::magnitude_type T;
  typename Matrix::const_iterator i;
  typename Matrix::OneD::const_iterator j, jend;
  
  dense1D<T> sums(A.minor(), T(0));

  for (i = A.begin(); i != A.end(); ++i) {
    j = (*i).begin();
    jend = (*i).end();
    if (A.is_lower()) { /* handle the diagonal elements */
      sums[j.row()] += std::abs(*j);
      ++j;
    } else
      --jend;
    for (; j != jend; ++j) {
      sums[j.row()] += std::abs(*j);
      sums[j.column()] += std::abs(*j);
    }
    if (A.is_upper())
      sums[j.row()] += std::abs(*j);
  }
  return infinity_norm(sums, oned_tag());
}

template <class Matrix>
inline typename linalg_traits<Matrix>::magnitude_type
symmetric_norm(const Matrix& A)
{
  typedef typename matrix_traits<Matrix>::orientation Orien;
  return symmetric_norm(A, Orien());
}

template <class Matrix>
inline typename linalg_traits<Matrix>::magnitude_type
diagonal_one_norm(const Matrix& A)
{
  typedef linalg_traits<Matrix>::magnitude_type T;
  typename Matrix::const_iterator i;
  typename Matrix::OneD::const_iterator j, jend;

  dense1D<T> sums(A.ncols(), T(0));

  for (i = A.begin(); i != A.end(); ++i) {
    j = (*i).begin(); jend = (*i).end();
    for (; j != jend; ++j)
      sums[j.column()] += std::abs(*j);
  }

  return infinity_norm(sums);
}

template <class Matrix>
inline typename linalg_traits<Matrix>::magnitude_type
diagonal_infinity_norm(const Matrix& A)
{
  typedef linalg_traits<Matrix>::magnitude_type T;
  typename Matrix::const_iterator i;
  typename Matrix::OneD::const_iterator j, jend;

  dense1D<T> sums(A.nrows(), T(0));

  for (i = A.begin(); i != A.end(); ++i) {
    j = (*i).begin(); jend = (*i).end();
    for (; j != jend; ++j)
      sums[j.row()] += std::abs(*j);
  }

  return infinity_norm(sums);
}



//: dispatch function
//!noindex:
template <class Matrix>
inline typename linalg_traits<Matrix>::magnitude_type
__one_norm(const Matrix& A, column_tag)
{
  return __major_norm(A);
}


//: dispatch function
//!noindex:
template <class Matrix>
inline typename linalg_traits<Matrix>::magnitude_type
__one_norm(const Matrix& A, row_tag)
{
  return __minor_norm(A);
}


template <class Matrix, class Shape>
inline typename linalg_traits<Matrix>::magnitude_type
twod_one_norm(const Matrix& A, Shape)
{
  typedef typename Matrix::orientation Orien;
  return __one_norm(A, Orien());
}

template <class Matrix>
inline typename linalg_traits<Matrix>::magnitude_type
twod_one_norm(const Matrix& A, symmetric_tag)
{
  return symmetric_norm(A);
}

template <class Matrix>
inline typename linalg_traits<Matrix>::magnitude_type
twod_one_norm(const Matrix& A, diagonal_tag)
{
  return diagonal_one_norm(A);
}


template <class Linalg>
inline typename linalg_traits<Linalg>::magnitude_type
one_norm(const Linalg& A, twod_tag)
{
  typedef typename matrix_traits<Linalg>::shape Shape;
  return twod_one_norm(A, Shape());
}

//: One Norm:  <tt>s <- sum(|x_i|) or s <- max_i(sum_j(|A(i,j)|))</tt>
//
// For vectors, the sum of the absolute values of the elements.
// For matrices, the maximum of the column sums.
// Note: not implemented yet for unit triangle matrices.
//
//!category: algorithms
//!component: function
//!definition: mtl.h
//!example: vec_one_norm.cc
//!complexity: O(n)
//!typereqs: The vector or matrix must have an associated magnitude_type that
//   is the type of the absolute value of its <tt>value_type</tt>.
//!typereqs: There must be <tt>std::abs()</tt> defined for <tt>Vector::value_type</tt>.
//!typereqs: The addition must be defined for magnitude_type.
template <class LinalgObj>
inline typename linalg_traits<LinalgObj>::magnitude_type
one_norm(const LinalgObj& A)
{
  typedef typename linalg_traits<LinalgObj>::dimension Dim;
  return one_norm(A, Dim());
}


//: dispatch function
//!noindex:
template <class Matrix>
inline typename linalg_traits<Matrix>::magnitude_type
__infinity_norm(const Matrix& A, row_tag)
{
  return __major_norm(A);
}

//: dispatch function
//!noindex:
template <class Matrix>
inline typename linalg_traits<Matrix>::magnitude_type
__infinity_norm(const Matrix& A, column_tag)
{
  return __minor_norm(A);
}

template <class Matrix, class Shape>
inline typename linalg_traits<Matrix>::magnitude_type
twod_infinity_norm(const Matrix& A, Shape)
{
  typedef typename Matrix::orientation Orien;
  return __infinity_norm(A, Orien());
}

template <class Matrix>
inline typename linalg_traits<Matrix>::magnitude_type
twod_infinity_norm(const Matrix& A, symmetric_tag)
{
  return symmetric_norm(A);
}

template <class Matrix>
inline typename linalg_traits<Matrix>::magnitude_type
twod_infinity_norm(const Matrix& A, diagonal_tag)
{
  return diagonal_infinity_norm(A);
}


template <class Matrix>
inline typename linalg_traits<Matrix>::magnitude_type
infinity_norm(const Matrix& A, twod_tag)
{
  typedef typename matrix_traits<Matrix>::shape Shape;
  return twod_infinity_norm(A, Shape());
}


//: Infinity Norm: <tt>s <- max_j(sum_i(|A(i,j)|)) or s <- max_i(|x(i)|)</tt>
//
// For matrices, the maximum of the row sums.
// For vectors, the maximum absolute value of any of its element.
//
//!category: algorithms
//!component: function
//!definition: mtl.h
//!complexity: O(n) for vectors, O(m*n) for dense matrices, O(nnz) for sparse
//!example: vec_inf_norm.cc
//!typereqs: The vector or matrix must have an associated magnitude_type that is the type of the absolute value of its <tt>value_type</tt>.
//!typereqs: There must be <tt>std::abs()</tt> defined for <tt>Vector::value_type</tt>.
//!typereqs: The addition must be defined for magnitude_type.
template <class LinalgObj>
inline typename linalg_traits<LinalgObj>::magnitude_type
infinity_norm(const LinalgObj& A)
{
  typedef typename linalg_traits<LinalgObj>::dimension Dim;
  return infinity_norm(A, Dim());
}


//: Max Index:  <tt>i <- index of max(|x(i)|)</tt>
//!category: algorithms
//!component: function
//!definition: mtl.h
//!complexity: O(n)
// The location (index) of the element with the maximum absolute value.
//!example: max_index.cc
//!typereqs: The vector or matrix must have an associated magnitude_type that
//   is the type of the absolute value of its <tt>value_type</tt>.
//!typereqs: There must be <tt>std::abs()</tt> defined for <tt>Vector::value_type</tt>.
//!typereqs: There must be a <tt>std::max()</tt> function defined for magnitude_type.
template <class Vec>
inline typename Vec::size_type
max_index(const Vec& x)
{
  typename Vec::const_iterator maxi =
    mtl_algo::max_element(x.begin(), x.end(), abs_cmp());
  return maxi.index();
}

//: Min Index:  <tt>i <- index of min(|x(i)|)</tt>
//!category: algorithms
//!component: function
//!definition: mtl.h
//!complexity: O(n)
// The location (index) of the element with the minimum absolute value.
//!example: max_index.cc
//!typereqs: The vector or matrix must have an associated magnitude_type that
//   is the type of the absolute value of its <tt>value_type</tt>.
//!typereqs: There must be <tt>std::abs()</tt> defined for <tt>Vector::value_type</tt>.
//!typereqs: There must be a <tt>std::max()</tt> function defined for magnitude_type.
template<class Vec>
inline typename Vec::size_type
min_index(const Vec& x) 
{
  typename Vec::const_iterator mini = 
    mtl_algo::min_element(x.begin(), x.end(), abs_cmp());   
  return mini.index(); 
}                    


//: Max Value:  <tt>s <- max(x(i))</tt>
//!category: algorithms
//!component: function
//!definition: mtl.h
//!example: vec_max.cc
//!complexity: O(n)
// Returns the value of the element with the maximum value
//!typereqs: There must be a <tt>std::max()</tt> function defined for magnitude_type.
template <class Vec>
inline typename Vec::value_type
max(const Vec& x)
{
  return *mtl_algo::max_element(x.begin(), x.end());
}



//: Min Value:  <tt>s <- min(x_i)</tt>
//!category: algorithms
//!component: function
//!complexity: O(n)
//!definition: mtl.h
//!typereqs: There must be a <tt>std::min()</tt> function defined for <tt>Vec::value_type</tt>.
template <class Vec>
inline typename Vec::value_type
min(const Vec& x)
{
  return *mtl_algo::min_element(x.begin(), x.end());
}


//: Givens Plane Rotation
//!category: functors
//!component: type
//!definition: mtl.h
//!example: apply_givens.cc
//
// Input a and b to the constructor to create a givens plane rotation
// object. Then apply the rotation to two vectors. There is a
// specialization of the givens rotation for complex numbers.
//
// <codeblock>
// [  c  s ] [ a ] = [ r ]
// [ -s  c ] [ b ]   [ 0 ]
// </codeblock>
//
//!typereqs: the addition operator must be defined for <tt>T</tt>
//!typereqs: the multiplication operator must be defined for <tt>T</tt>
//!typereqs: the division operator must be defined for <tt>T</tt>
//!typereqs: the std::abs() function must be defined for <tt>T</tt>
template <class T>
class givens_rotation {
public:

  //: Default constructor
  inline givens_rotation() : a_(0), b_(0), c_(0), s_(0) { }

  //: Givens Plane Rotation Constructor
  inline givens_rotation(T a_in, T b_in) {
    T roe;
    if (std::abs(a_in) > std::abs(b_in))
      roe = a_in;
    else
      roe = b_in;
    
    T scal = std::abs(a_in) + std::abs(b_in);
    T r, z;
    if (scal != T(0)) {
      T a_scl = a_in / scal;
      T b_scl = b_in / scal;
      r = scal * sqrt(a_scl * a_scl + b_scl * b_scl);
      if (roe < T(0)) r *= -1;
      c_ = a_in / r;
      s_ = b_in / r;
      z = 1;
      if (std::abs(a_in) > std::abs(b_in))
	z = s_;
      else if (std::abs(b_in) >= std::abs(a_in) && c_ != T(0))
	z = T(1) / c_;
    } else {
      c_ = 1; s_ = 0; r = 0; z = 0;      
    }
    a_ = r;
    b_ = z;
  }

  inline void set_cs(T cin, T sin) { c_ = cin; s_ = sin; }

  //: Apply plane rotation to two vectors.
  template <class VecX, class VecY>
  inline void apply(VecX x, VecY y) MTL_THROW_ASSERTION {
    MTL_ASSERT(x.size() <= y.size(), "vec::givens_rotation::apply()");

    typename VecX::iterator xi = x.begin();
    typename VecX::iterator xend = x.end();
    typename VecY::iterator yi = y.begin();

    while (mtl::not_at(xi, xend)) {
      apply(*xi, *yi);
      ++xi; ++yi;
    }
  }
  //: Apply plane rotation to two real scalars.
  inline void apply(T& x, T& y) {
    T tmp = c_ * x + s_ * y;
    y = c_ * y - s_ * x;
    x = tmp;
  }

  inline T a() { return a_; }
  inline T b() { return b_; }
  inline T c() { return c_; }
  inline T s() { return s_; }

protected:
  T a_, b_, c_, s_;
};

//:  The specialization for complex numbers.
//!category: functors
//!component: type
template <class T>
class givens_rotation < std::complex<T> > {
public:
  //:
  inline givens_rotation() : cs(0), sn(0) { }
  //:
  inline givens_rotation(std::complex<T> a_in, std::complex<T> b_in) {
    double a = std::abs(a_in), b = std::abs(b_in);
    double length = sqrt(a*a+b*b);
    cs = a_in / T(length);
    sn = b_in / T(length);
  }
  //:  Apply plane rotation to two vectors.
  template <class VecX, class VecY>
  inline void apply(VecX x, VecY y) MTL_THROW_ASSERTION {
    MTL_ASSERT(x.size() <= y.size(), "vec::givens_rotation::apply()");

    typename VecX::iterator xi = x.begin();
    typename VecX::iterator xend = x.end();
    typename VecY::iterator yi = y.begin();
    
    while (mtl::not_at(xi, xend)) {
      apply(*xi, *yi);
      ++xi; ++yi;
    }
  }
  //: Apply plane rotation to two complex scalars.
  inline void apply(std::complex<T>& x, std::complex<T>& y) {
    complex<T> temp  =  conj(cs) * x + conj(sn) * y;
    y = cs * y - sn * x; 
    x = temp;
  }

protected:
  std::complex<T> cs;
  std::complex<T> sn;
};

//: Modified Givens Transformation
//!category: functors
//!component: type
//  
//  This class is under construction.  Like the givens rotation class,
//  there will be a real and complex class.
template <class T>
class modified_givens {

};



//: Transpose in Place:  <tt>A <- A^T</tt>
// Currently this algorithm only applies to square dense matrices
// Plan to include all rectangular dense matrices..
//!category: algorithms
//!component: function
//!definition: mtl.h
template <class Matrix>
inline void
transpose(Matrix& A) MTL_THROW_ASSERTION
{
  MTL_ASSERT(A.nrows() == A.ncols(), "mat::transpose()");
  typedef typename matrix_traits<Matrix>::value_type T;
  typedef typename mtl::matrix_traits<Matrix>::size_type Int;
  for (Int i = 0; i < A.nrows(); ++i)
    for (Int j = i; j < A.ncols(); ++j) {
      T tmp = A(i, j);
      A(i, j) = A(j, i);
      A(j, i) = tmp;
    }
}


//: Transpose: <tt>B <- A^T</tt>
//!precond:  <tt> B(i,j) = 0 & B = A^T </tt>
//
//  When matrix B is banded, it is up to the user to ensure
//  that the bandwidth is sufficient to contain the elements
//  from A^T. If there are elements of A^T that do not
//  fall within the bandwidth, an exception will be thrown.
//  (exception not implemented yet).
//
//!category: algorithms
//!component: function
//!definition: mtl.h
//!complexity: O(n^2)
template <class MatA, class MatB>
inline void
transpose(const MatA& A, MatB B) MTL_THROW_ASSERTION
{
  MTL_ASSERT(A.nrows() <= B.ncols(), "matmat::transpose()");
  MTL_ASSERT(A.ncols() <= B.nrows(), "matmat::transpose()");

  typename MatA::const_iterator i;
  typename MatA::OneD::const_iterator j, jend;

  for (i = A.begin(); i != A.end(); ++i) {
    j = (*i).begin(); jend = (*i).end();
    for (; j != jend; ++j)
      B(j.column(), j.row()) = *j;
  }
}


/*
  This version of the algorithm depends on the compiler
  hoisting the reference of z[j.row()] out of the inner loop
  (for the row major case)
  KCC doesn't do this, and niether does the underlying Sun C
  compiler.

  In order to hoist the reference by hand, I'll have to write
  specializations for column major matrix and for row major matrices.
  While I'm at it I'll the the unrolling stuff too.

*/

/* this is generic
 */
template <class Matrix, class VecX, class VecZ, class Shape>
inline void
__mult(const Matrix& A, const VecX& x, VecZ z,
       Shape) MTL_THROW_ASSERTION
{
  MTL_ASSERT(A.nrows() <= y.size(), "matvec::mult()");
  MTL_ASSERT(A.nrows() <= z.size(), "matvec::mult()");
  MTL_ASSERT(A.ncols() <= x.size(), "matvec::mult()");
  typedef typename matrix_traits<Matrix>::value_type T;
  typename Matrix::const_iterator i;
  typename Matrix::OneD::const_iterator j, jend;

  for (i = A.begin(); i != A.end(); ++i) {
    j = (*i).begin(); jend = (*i).end();
    for (; j != jend; ++j)
      z[j.row()] += *j * x[j.column()];
  }
}

/* this is fast
 */
template <class Matrix, class VecX, class VecZ>
inline void
rect_mult(const Matrix& A, const VecX& x, VecZ z,
	  dense_tag, row_tag) MTL_THROW_ASSERTION
{
  MTL_ASSERT(A.nrows() <= y.size(), "matvec::mult()");
  MTL_ASSERT(A.nrows() <= z.size(), "matvec::mult()");
  MTL_ASSERT(A.ncols() <= x.size(), "matvec::mult()");
  typedef typename matrix_traits<Matrix>::value_type T;
  typename Matrix::const_iterator i;
  typename Matrix::OneD::const_iterator j, jend;

  for (i = A.begin(); i < A.end(); ++i) {
    j = (*i).begin(); jend = (*i).end();
    T tmp = z[j.row()];
    for (; j < (*i).end(); ++j)
      tmp += *j * x[j.column()];
    z[j.row()] = tmp;
  }
}


/*
  This is slow

 */
template <class Matrix, class VecX, class VecZ>
inline void
rect_mult(const Matrix& A, const VecX& x, VecZ z,
	  dense_tag, column_tag) MTL_THROW_ASSERTION
{
  MTL_ASSERT(A.nrows() <= y.size(), "matvec::mult()");
  MTL_ASSERT(A.nrows() <= z.size(), "matvec::mult()");
  MTL_ASSERT(A.ncols() <= x.size(), "matvec::mult()");
  typedef typename matrix_traits<Matrix>::value_type T;
  typedef typename matrix_traits<Matrix>::size_type Int;

  typename Matrix::const_iterator i;
  typename Matrix::OneD::const_iterator j, jend;

  for (i = A.begin(); i < A.end(); ++i) {
    j = (*i).begin(); jend = (*i).end();
    for (; j < (*i).end(); ++j)
      z[j.row()] += *j * x[j.column()];
  }
}

template <class Matrix, class VecX, class VecZ, class Orien>
inline void
rect_mult(const Matrix& A, const VecX& x, VecZ z,
	  sparse_tag, Orien) MTL_THROW_ASSERTION
{
  __mult(A, x, z, banded_tag()); /* just call the generic mult */
}

template <class Matrix, class VecX, class VecZ>
inline void
__mult(const Matrix& A, const VecX& x, VecZ z,
       rectangle_tag) MTL_THROW_ASSERTION
{
  typedef typename matrix_traits<Matrix>::sparsity Sparsity;
  typedef typename matrix_traits<Matrix>::orientation Orien;

  rect_mult(A, x, z, Sparsity(), Orien());
}

template <class Matrix, class VecX, class VecZ>
inline void
__mult(const Matrix& A, const VecX& x, VecZ z, 
       triangle_tag)
{
  __mult(A, x, z, rectangle_tag());
  if (A.is_unit()) {
    /* actually, this still isn't quite right,
       should do
       add_n(x, z, z, min(A.nrows(), A.ncols()));
       instead
       */
    if (z.size() <= x.size())
      mtl::add(z, x, z);
    else
      mtl::add(x, z, z);      
  }
}

template <class Matrix, class VecX, class VecZ>
inline void
__mult(const Matrix& A, const VecX& x, VecZ z, 
       symmetric_tag, row_tag)
{
  typedef typename matrix_traits<Matrix>::value_type T;
  typename Matrix::const_iterator i;
  typename Matrix::OneD::const_iterator j, jend;

  for (i = A.begin(); i != A.end(); ++i) {
    T tmp = z[i.index()];
    j = (*i).begin();
    jend = (*i).end();
    if (A.is_upper()) {
      tmp += *j * x[j.column()];
      ++j;
    } else
      --jend;
    for (; j != jend; ++j) {
      /* normal side */
      tmp += *j * x[j.column()];
      /* symmetric side */
      z[j.column()] += *j * x[j.row()];
    }
    if (A.is_lower())
      tmp += *j * x[j.column()];
    z[i.index()] = tmp;
  }
}

template <class Matrix, class VecX, class VecZ>
inline void
__mult(const Matrix& A, const VecX& x, VecZ z, 
       symmetric_tag, column_tag)
{
  typedef typename matrix_traits<Matrix>::value_type T;
  typename Matrix::const_iterator i;
  typename Matrix::OneD::const_iterator j, jend;

  for (i = A.begin(); i != A.end(); ++i) {
    T tmp = T(0);
    j = (*i).begin();
    jend = (*i).end();
    if (A.is_lower()) {
      z[j.column()] += *j * x[j.column()];
      ++j;
    } else
      --jend;
    for (; j != jend; ++j) {
      /* normal side */
      z[j.row()] += *j * x[j.column()];
      /* symmetric side */
      tmp += *j * x[j.row()];
    }
    if (A.is_upper())
      tmp += *j * x[j.row()];      
    z[i.index()] += tmp;
  }
}


template <class Matrix, class VecX, class VecZ>
inline void
__mult(const Matrix& A, const VecX& x, VecZ z, 
       symmetric_tag)
{
  typedef typename matrix_traits<Matrix>::orientation Orien;
  __mult(A, x, z, symmetric_tag(), Orien());
}

//: Multiplication:  <tt>z <- A x + y</tt>
//!category: algorithms
//!component: function 
//!definition: mtl.h
//!precond:  <TT>A.nrows() <= y.size()</TT>
//!precond:  <TT>A.nrows() <= z.size()</TT>
//!precond:  <TT>A.ncols() <= x.size()</TT>
//!precond:  no aliasing in the arguments
//!example: symm_sparse_vec_prod.cc
//!typereqs: <tt>Matrix::value_type</tt>, <tt>VecX::value_type</tt>, <tt>VecY::value_type</tt>, and <tt>VecZ::value_type</tt> must be the same type
//!typereqs: the multiplication operator must be defined for <tt>Matrix::value_type</tt>
//!typereqs: the addition operator must be defined for <tt>Matrix::value_type</tt>
template <class Matrix, class VecX, class VecY, class VecZ>
inline void
mult(const Matrix& A, const VecX& x, const VecY& y, VecZ z) MTL_THROW_ASSERTION
{
  mtl::copy(y, z);
  typedef typename matrix_traits<Matrix>::shape Shape;
  __mult(A, x, z, Shape());
}


//: Matrix Vector Multiplication:  <tt>y <- A x</tt>
//
// Multiplies matrix A times vector x and stores the result in vector y.
// <p>
// Note: ignore the <tt>oned_tag</tt> parameter and the underscores in
// the name of this function.
//
//!category: algorithms
//!component: function
//!definition: mtl.h
//!example: general_matvec_mult.cc, banded_matvec_mult.cc, symm_matvec_mult.cc
//!precond:  <TT>A.nrows() <= y.size()</TT>
//!precond:  <TT>A.ncols() <= x.size()</TT>
//!precond:  x and y not same vector
//!example: symm_matvec_mult.cc
//!typereqs: <tt>Matrix::value_type</tt>, <tt>VecX::value_type</tt>, and <tt>VecY::value_type</tt> must be the same type
//!typereqs: the multiplication operator must be defined for <tt>Matrix::value_type</tt>
//!typereqs: the addition operator must be defined for <tt>Matrix::value_type</tt>
template <class Matrix, class VecX, class VecY>
inline void
__mult(const Matrix& A, const VecX& x, VecY y, oned_tag) MTL_THROW_ASSERTION
{
  mult(A, x, scaled(y, 0), y);
}

template <class Matrix, class VecX, class VecY>
inline void
mult_add(const Matrix& A, const VecX& x, VecY y) MTL_THROW_ASSERTION
{
  typedef typename matrix_traits<Matrix>::shape Shape;
  __mult(A, x, y, Shape());
}


//: simple 3 loop version of matmat mult
//!noindex:
template <class MatA, class MatB, class MatC>
inline void
simple_mult(const MatA& A, const MatB& B, MatC& C, dense_tag)
{
  typedef typename matrix_traits<MatA>::size_type Int;
  typename MatA::const_iterator A_k;
  typename MatA::OneD::const_iterator A_ki;

  A_k = A.begin();
  while (not_at(A_k, A.end())) {
    for (Int j = 0; j < B.ncols(); ++j) {
      A_ki = (*A_k).begin();
      while (not_at(A_ki, (*A_k).end())) {
	Int k = A_ki.column();
	Int i = A_ki.row();
	C(i,j) += *A_ki * B(k,j);
        ++A_ki;
      }
    }
    ++A_k;
  }
}


template <class MatrixA, class MatrixB, class MatrixC>
inline void
simple_mult(const MatrixA& A, const MatrixB& B, MatrixC& C, sparse_tag)
{
  typedef typename matrix_traits<MatrixA>::value_type T;
  typedef typename matrix_traits<MatrixA>::size_type Int;
  T scal;
  Int len = 0;
  Int jj, k;
  Int nzmax = C.capacity();

  dense1D<Int> ic(A.nrows() + 1, 0);
  dense1D<Int> jc(nzmax);
  dense1D<T> c(nzmax);
  
  typedef typename dense1D<Int>::iterator di_iter;
  typedef typename dense1D<T>::iterator dt_iter;
  
  compressed1D<T> tmp1, tmp2, tmp3;
  tmp1.reserve(B.ncols());
  tmp2.reserve(B.ncols());
  
  typedef typename compressed1D<T>::iterator tmpiter;
  
  typename MatrixA::const_iterator Ai;
  typename MatrixA::Row::const_iterator Aij;
  typename MatrixB::Row::const_iterator Bij;
  typename MatrixB::Row::const_iterator Bijend;
  
  for (Ai = A.begin(); Ai != A.end(); ++Ai) {
    for (Aij = (*Ai).begin(); Aij != (*Ai).end(); ++Aij) {
      scal = *Aij;
      jj = Aij.column();
      // add B[jj] and tmp1 into tmp2
      add(scaled(B[jj], scal), tmp1, tmp2);
      tmp1.resize(0);
      // swap tmp1 and tmp2
      tmp3 = tmp1; tmp1 = tmp2; tmp2 = tmp3;
    }
    // copy tmp1 into C[ii]
    k = len;
    if (k + tmp1.size() > nzmax) {
      cerr << "Not enough work space, increase nzmax" << endl;
      return;
    }
    for (tmpiter t = tmp1.begin(); t != tmp1.end(); ++t, ++k) {
      c[k] = *t;
      jc[k] = t.index();
    }
    
    len += tmp1.size();
    ic[Ai.index() + 1] = len;
    tmp1.clear();
  }
  
  typedef matrix<T, rectangle<>, 
                 compressed<Int, external>, 
                 row_major>::type  SpMat;
  SpMat CC(A.nrows(), B.ncols(), len, c.data(), ic.data(), jc.data());
  copy(CC, C);
}


//: Symmetric version, row-major
//!noindex:
template <class MatA, class MatB, class MatC>
inline void
symm_simple_mult(const MatA& A, const MatB& B, MatC& C, row_tag)
{
  typedef typename matrix_traits<MatA>::size_type Int;
  typename MatA::const_iterator A_k;
  typename MatA::OneD::const_iterator A_ki, A_kiend;

  A_k = A.begin();
  while (not_at(A_k, A.end())) {
    for (Int j = 0; j < B.ncols(); ++j) {
      A_ki = (*A_k).begin();
      A_kiend = (*A_k).end();

      Int k = A_ki.column();
      Int i = A_ki.row();

      if (A.is_upper()) { /* handle the diagonal elements */
	C(i,j) += *A_ki * B(k,j);
	++A_ki;
      } else
	--A_kiend;

      while (not_at(A_ki, A_kiend)) {
	k = A_ki.column();
	i = A_ki.row();
	C(i,j) += *A_ki * B(k,j);
	C(k,j) += *A_ki * B(i,j);
        ++A_ki;
      }
      k = A_ki.column();
      i = A_ki.row();
      if (A.is_lower())
	C(i,j) += *A_ki * B(k,j);

    }
    ++A_k;
  }
}

//: Symmetric version, column-major
//!noindex:
template <class MatA, class MatB, class MatC>
inline void
symm_simple_mult(const MatA& A, const MatB& B, MatC& C, column_tag)
{
  typedef typename matrix_traits<MatA>::size_type Int;
  typename MatA::const_iterator A_k;
  typename MatA::OneD::const_iterator A_ki, A_kiend;

  A_k = A.begin();
  while (not_at(A_k, A.end())) {
    for (Int j = 0; j < B.ncols(); ++j) {
      A_ki = (*A_k).begin();
      A_kiend = (*A_k).end();

      Int k = A_ki.column();
      Int i = A_ki.row();

      if (A.is_lower()) { /* handle the diagonal elements */
	C(i,j) += *A_ki * B(k,j);
	++A_ki;
      } else
	--A_kiend;

      while (not_at(A_ki, A_kiend)) {
	k = A_ki.column();
	i = A_ki.row();
	C(i,j) += *A_ki * B(k,j);
	C(k,j) += *A_ki * B(i,j);
        ++A_ki;
      }
      k = A_ki.column();
      i = A_ki.row();
      if (A.is_upper())
	C(i,j) += *A_ki * B(k,j);
    }

    ++A_k;
  }
}


//: Specialization for triangular matrices
//!noindex:
template <class MatA, class MatB, class MatC>
inline void
matmat_mult(const MatA& A, const MatB& B, MatC& C, symmetric_tag)
{
  typedef typename matrix_traits<MatA>::orientation Orien;
  symm_simple_mult(A, B, C, Orien());
}

//: Specialization for triangular matrices
//!noindex
template <class MatA, class MatB, class MatC>
inline void
matmat_mult(const MatA& A, const MatB& B, MatC& C, triangle_tag)
{
  typedef typename matrix_traits<MatA>::size_type Int;
  if (A.is_unit()) {
    Int M = MTL_MIN(A.nrows(), A.ncols());
    Int N = B.ncols();
    for (Int i = 0; i < M; ++i)
      for (Int j = 0; j < N; ++j)
	C(i,j) += B(i,j);
  }

  simple_mult(A, B, C, mtl::dense_tag());
}

//: Dispatch to row/column general and banded matrices
//!noindex:
template <class MatA, class MatB, class MatC, class Shape>
inline void
matmat_mult(const MatA& A, const MatB& B, MatC& C, Shape)
{
  typedef typename matrix_traits<MatA>::sparsity Sparsity;
  simple_mult(A, B, C, Sparsity());
}


//: Matrix multiplication  C <- C + A * B
//
//  The actual specialization of the algorithm used depends of the
//  types of matrices used. If all the matrices are dense and
//  rectangular the blocked algorithm is used (when --with-blais is
//  specified in the configure). Otherwise the traversal depends on
//  matrix A. Therefore if one is multiplying a sparse matrix by a
//  dense, one would want the sparse matrix as the A
//  argument. Typically, for performance reasons, one would not want
//  to use a sparse matrix for C.
//  <p>
//  Note: ignore the <tt>twod_tag</tt> argument and the underscores in
//  the name of this function.
//
//!precond: <tt>A.nrows() == C.nrows()</tt>
//!precond: <tt>A.ncols() == B.nrows()</tt>
//!precond: <tt>B.ncols() == C.ncols()</tt>
//!category: algorithms
//!component: function
//!definition: mtl.h
//!typereqs: the value types for each of the matrices must be compatible
//!typereqs: the multiplication operator must be defined for <tt>MatA::value_type</tt>
//!typereqs: the addition operator must be defined for <tt>MatA::value_type</tt>
template <class MatA, class MatB, class MatC>
inline void
__mult(const MatA& A, const MatB& B, MatC& C, twod_tag)
{
  typedef typename MatA::shape Shape;
  matmat_mult(A, B, C, Shape());
}


//: Dispatch between matrix matrix and matrix vector mult.
//!noindex:
template <class LinalgA, class LinalgB, class LinalgC>
inline void
mult(const LinalgA& A, const LinalgB& B, LinalgC& C)
{
  typedef typename linalg_traits<LinalgB>::dimension Dim;
  __mult(A, B, C, Dim());
}

//: for column oriented
//!noindex:
template <class TriMatrix, class VecX>
inline void
__tri_solve(const TriMatrix& T, VecX x, column_tag)
{
  typedef typename matrix_traits<TriMatrix>::size_type Int;
  typedef typename matrix_traits<TriMatrix>::value_type VT;
  typename VecX::value_type x_j; 

  if (T.is_upper()) {
    typename TriMatrix::const_reverse_iterator T_j; 
    typename TriMatrix::Column::const_reverse_iterator T_ji, T_jrend;

    for (T_j = T.rbegin(); T_j != T.rend(); ++T_j) {
      T_ji = (*T_j).rbegin();
      T_jrend = (*T_j).rend();
      Int j = T_ji.column();
      
      if (! T.is_unit()) {
	x[j] /= *T_ji; /* the diagonal */
	++T_ji;
      }
      x_j = x[j];

      while (T_ji != T_jrend) {
	Int i = T_ji.row();
	x[i] -= x_j * *T_ji;
	++T_ji;
      }
    }
  } else {			/* T is lower */
    typename TriMatrix::const_iterator T_j; 
    typename TriMatrix::Column::const_iterator T_ji, T_jend;

    for (T_j = T.begin(); T_j != T.end(); ++T_j) {
      T_ji = (*T_j).begin();
      T_jend = (*T_j).end();
      Int j = T_ji.column();
      
      if (! T.is_unit()) {
	x[j] /= *T_ji; /* the diagonal */
	++T_ji;
      }
      x_j = x[j];
      
      while (T_ji != T_jend) {
	Int i = T_ji.row();
	x[i] -= x_j * *T_ji;
	++T_ji;
      }
    }
  }    
}

//: for row major
//!noindex:
template <class TriMatrix, class VecX>
inline void
__tri_solve(const TriMatrix& T, VecX x, row_tag)
{
  typedef typename matrix_traits<TriMatrix>::value_type VT;
  typedef typename matrix_traits<TriMatrix>::size_type Int;

  if (T.is_upper()) {
    typename TriMatrix::const_reverse_iterator T_i; 
    typename TriMatrix::Row::const_reverse_iterator T_ij;

    T_i = T.rbegin();
    if (! T.is_unit()) {
      T_ij = (*T_i).rbegin();
      x[T_ij.row()] /= *T_ij;
      ++T_i;
    }

    while (T_i != T.rend()) {
      T_ij = (*T_i).rbegin();
      Int i = T_ij.row();
      VT t = x[i];

      typename TriMatrix::Row::const_reverse_iterator T_iend;
      T_iend = (*T_i).rend();
      if (! T.is_unit())
	--T_iend;

      Int j;
      while (T_ij != T_iend) {
	j = T_ij.column();
	t -= (*T_ij) * x[j];	  
	++T_ij;
      }
      if (!T.is_unit())
	t /= *T_ij;
	
      x[i] = t;

      ++T_i;
    }
  } else { /* T is lower */

    typename TriMatrix::const_iterator T_i; 
    typename TriMatrix::Row::const_iterator T_ij;

    T_i = T.begin();

    if (! T.is_unit()) {
      T_ij = (*T_i).begin();
      x[T_ij.row()] *= VT(1) / *T_ij;
      ++T_i;
    }

    while (T_i != T.end()) {
      T_ij = (*T_i).begin();
      Int i = T_ij.row();
      VT t = x[i];

      typename TriMatrix::Row::const_iterator T_iend;
      T_iend = (*T_i).end();
      if (! T.is_unit())
	--T_iend;

      Int j;
      while (T_ij != T_iend) {
	j = T_ij.column();
	t -= (*T_ij) * x[j];
	++T_ij;
      }
      if (!T.is_unit())
	t /= *T_ij;

      x[i] = t;
      ++T_i;
    }
  }
}


//: Triangular Solve:  <tt>x <- T^{-1} * x</tt>
//  Use with trianguler matrixes only ie. use the <TT>triangle</TT>
//  adaptor class.
//
//  To use with a sparse matrix, the sparse matrix must be wrapped with
//  a triangle adaptor. You must specify "packed" in the triangle
//  adaptor. The sparse matrix must only have elements in the correct
//  side.
//
//!category: algorithms
//!component: function
//!definition: mtl.h
//!example: tri_solve.cc
//!typereqs: <tt>Matrix::value_type</tt> and <tt>VecX::value_type</tt> must be the same type
//!typereqs: the multiplication operator must be defined for <tt>Matrix::value_type</tt>
//!typereqs: the division operator must be defined for <tt>Matrix::value_type</tt>
//!typereqs: the addition operator must be defined for <tt>Matrix::value_type</tt>
template <class TriMatrix, class VecX>
inline void
tri_solve(const TriMatrix& T, VecX x) MTL_THROW_ASSERTION
{
  MTL_ASSERT(T.nrows() <= x.size(), "matvec::tri_solve()");
  MTL_ASSERT(T.ncols() <= x.size(), "matvec::tri_solve()");
  MTL_ASSERT(T.ncols() == T.nrows(), "matvec::tri_solve()");
  typedef typename TriMatrix::orientation orien;
  __tri_solve(T, x, orien());
}




//: for tri_solve
//!noindex:
class right_side { };

//: for tri_solve
//!noindex:
class left_side { };

//: tri solve for left side
//!noindex:
template <class MatT, class MatB>
inline void
__tri_solve(const MatT& T, MatB& B, left_side)
{
  /*  const int M = B.nrows(); */
  const int N = B.ncols();

  /* unoptimized version */
  for (int j = 0; j < B.ncols(); ++j)
    mtl::tri_solve(T, columns(B)[j]);


  /* JGS need to do an optimized version of this
  if (T.is_upper()) {
    for (int k = M-1; k > 0; --k) {
      if (B(k,j) != 0) {
	if (! T.is_unit())
	  B(k,j) /= T(k,k);
	for (int i = 0; i < k; ++i)
	  B(i,j) -= B(k,j) * T(i,k);
      }
    }
  } else {
    for (int j = 0; j < N; ++j)
      for (int k = 0; k < M; ++k) {
	if (B(k,j) != 0) {
	  if (! T.is_unit())
	    B(k,j) /= T(k,k);
	  for (int i = k; i < M; ++i)
	    B(i,j) -= B(k,j) * T(i,k);
	}
      }
  }
  */
}


/* JGS untested!!! */

//: tri solve for right side
//!noindex:
template <class MatT, class MatB>
inline void
__tri_solve(const MatT& T, MatB& B, right_side)
{
  const int M = B.nrows();
  const int N = B.ncols();
  typedef typename MatT::PR PR;

  if (T.is_upper()) {
    for (int j = 0; j < N; ++j) {
      for (int k = 0; k < j; ++k)
	if (T(k,j) != PR(0))
	  for (int i = 0; i < M; ++i)
	    B(i,j) -=  T(k,j) * B(i,k);
      if (! T.is_unit()) {
	PR tmp = PR(1) / T(j,j);
	for (int i = 1; i < M; ++i)
	  B(i,j) = tmp * B(i,j);
      }
    }
  } else { // T is lower
    for (int j = N - 1; j > 0; --j) {
      for (int k = j; k < N; ++k)
	if (T(k,j) != PR(0))
	  for (int i = 0; i < M; ++i)
	    B(i,j) -=  T(k,j) * B(i,k);
      if (! T.is_unit()) {
	PR tmp = PR(1) / T(j,j);
	for (int i = 1; i < M; ++i)
	  B(i,j) = tmp * B(i,j);
      }
    }
  }
}

//: Triangular Solve: <tt>B <- A^{-1} * B  or  B <- B * A^{-1}</tt>
//
//  This solves the equation \TEX{T*X = B} or \TEX{X*T = B} where T
//  is an upper or lower triangular matrix, and B is a general
//  matrix. The resulting matrix X is written onto matrix B. The first
//  equation is solved if {\tt left_side} is specified. The second
//  equation is solved if {\tt right_side} is specified.
//
//  Currently only works with dense storage format.
//
//!category: algorithms
//!component: function
//!definition: mtl.h
//!complexity: O(n^3)
//!example: matmat_trisolve.cc
//!typereqs: <tt>MatT::value_type</tt> and <tt>MatB::value_type</tt> must be the same type
//!typereqs: the multiplication operator must be defined for <tt>MatT::value_type</tt>
//!typereqs: the division operator must be defined for <tt>MatT::value_type</tt>
//!typereqs: the addition operator must be defined for <tt>MatT::value_type</tt>
template <class MatT, class MatB, class Side>
inline void
tri_solve(const MatT& T, MatB& B, Side s)
{
  __tri_solve(T, B, s);
}





//: Rank One Update:   <tt>A <- A  +  x * y^T</tt>
//
// Also known as the outer product of two vectors.
// <codeblock>
//       y = [ 1  2  3 ]
//
//     [ 1 ] [ 1  2  3 ]
// x = [ 2 ] [ 2  4  6 ] => A
//     [ 3 ] [ 3  6  9 ]
//     [ 4 ] [ 4  8 12 ]
// </codeblock>
// <p>
// When using this algorithm with a symmetric matrix, x and y
// must be the same vector, or at least have the same values.
// Otherwise the resulting matrix is not symmetric.
//
//!precond:  <TT>A.nrows() <= x.size()</TT>
//!precond:  <TT>A.ncols() <= y.size()</TT>
//!precond: A has rectangle shape and is dense
//!category: algorithms
//!component: function
//!definition: mtl.h
//!example: rank_one.cc
//!typereqs: <tt>Matrix::value_type</tt>, <tt>VecX::value_type</tt>, and <tt>VecY::value_type</tt> must be the same type
//!typereqs: the multiplication operator must be defined for <tt>Matrix::value_type</tt>
//!typereqs: the addition operator must be defined for <tt>Matrix::value_type</tt>
template <class Matrix, class VecX, class VecY>
inline void
rank_one_update(Matrix A, const VecX& x, const VecY& y) MTL_THROW_ASSERTION
{
  MTL_ASSERT(A.nrows() <= x.size(), "matvec::rank_one_update()");
  MTL_ASSERT(A.ncols() <= y.size(), "matvec::rank_one_update()");
  typename Matrix::iterator i;
  typename Matrix::OneD::iterator j, jend;
  for (i = A.begin(); i != A.end(); ++i) {
    j = (*i).begin(); jend = (*i).end();
    for (; j != jend; ++j)
      *j += x[j.row()] * conj(y[j.column()]);
  }
}



/* 1. how will the scaling by alpha work into this
 * 2. is my placement of conj() ok with respect
 *    to both row and column oriented matrices
 * 3. Perhaps split this in two, have diff version for complex
 */

//: Rank Two Update:  <tt>A <- A  +  x * y^T  +  y * x^T</tt>
//
//
//!category: algorithms
//!component: function
//!precond:   <TT>A.nrows() == A.ncols()</TT>
//!precond:   <TT>A.nrows() == x.size()</TT>
//!precond:   <TT>x.size() == y.size()</TT>
//!precond: A has rectangle shape and is dense.
//!definition: mtl.h
//!example: rank_2_symm_sparse.cc
//!typereqs: <tt>Matrix::value_type</tt>, <tt>VecX::value_type</tt>, and <tt>VecY::value_type</tt> must be the same type.
//!typereqs: The multiplication operator must be defined for <tt>Matrix::value_type</tt>.
//!typereqs: The addition operator must be defined for <tt>Matrix::value_type</tt>.
template <class Matrix, class VecX, class VecY>
inline void
rank_two_update(Matrix A, const VecX& x, const VecY& y) MTL_THROW_ASSERTION
{
  MTL_ASSERT(A.nrows() == A.ncols(), "matvec::rank_two_update()");
  MTL_ASSERT(A.nrows() <= x.size(), "matvec::rank_two_update()");
  MTL_ASSERT(A.nrows() <= y.size(), "matvec::rank_two_update()");
  typename Matrix::iterator i;
  typename Matrix::OneD::iterator j, jend;
  for (i = A.begin(); i != A.end(); ++i) {
    j = (*i).begin(); jend = (*i).end();
    for (; j != jend; ++j)
      *j += x[j.row()]*conj(y[j.column()]) + y[j.row()]*conj(x[j.column()]);
  }
}

template <class VecX, class VecY>
inline void
__copy(const VecX& x, VecY y, fast::count<0>)
{
  mtl_algo::copy(x.begin(), x.end(), y.begin());
}  
#if USE_BLAIS
template <class VecX, class VecY, int N>
inline void
__copy(const VecX& x, VecY y, fast::count<N>)
{
  fast::copy(x.begin(), fast::count<N>(), y.begin());
}  
#endif


template <class VecX, class VecY>
inline void
oned_copy(const VecX& x, VecY y, dense_tag, dense_tag) MTL_THROW_ASSERTION
{
  MTL_ASSERT(x.size() <= y.size(), "vecvec::copy()");
  __copy(x, y, dim_n<VecX>::RET());
}  

/* perform a scatter */
template <class VecX, class VecY>
inline void
oned_copy(const VecX& x, VecY y, sparse_tag, dense_tag) MTL_THROW_ASSERTION
{
  typename VecX::const_iterator xi;
  for (xi = x.begin(); xi != x.end(); ++xi)
    y[xi.index()] = *xi;
}  

/* perform a gather */
template <class VecX, class VecY>
inline void
oned_copy(const VecX& x, VecY y, dense_tag, sparse_tag) MTL_THROW_ASSERTION
{
  typedef typename VecX::value_type T;
  typename VecY::iterator yi;
  for (yi = y.begin(); yi != y.end(); ++yi)
    *yi = x[yi.index()];
}  

template <class VecX, class VecY>
inline void
oned_copy(const VecX& x, VecY y, sparse_tag, sparse_tag) MTL_THROW_ASSERTION
{
  MTL_ASSERT(x.size() <= y.size(), "vecvec::copy()");
  mtl_algo::copy(x.begin(), x.end(), y.begin());
}  


template <class VecX, class VecY>
inline void
__copy(const VecX& x, VecY y, oned_tag) MTL_THROW_ASSERTION
{
  typedef typename linalg_traits<VecX>::sparsity SpX;
  typedef typename linalg_traits<VecY>::sparsity SpY;
  oned_copy(x, y, SpX(), SpY());
}  


template <class MatA, class MatB, class Shape>
inline void
twod_copy(const MatA& A, MatB B, Shape) MTL_THROW_ASSERTION
{
  typename MatA::const_iterator i;
  typename MatA::OneD::const_iterator j, jend;

  for (i = A.begin(); i != A.end(); ++i) {
    j = (*i).begin(); jend = (*i).end();
    for (; j != jend; ++j)
      B(j.row(),j.column()) = *j;
  }
}

template <class MatA, class MatB>
inline void
twod_copy(const MatA& A, MatB B, symmetric_tag) MTL_THROW_ASSERTION
{
  typename MatA::const_iterator i;
  typename MatA::OneD::const_iterator j, jend;

  for (i = A.begin(); i != A.end(); ++i) {
    j = (*i).begin(); jend = (*i).end();
    for (; j != jend; ++j) {
      B(j.row(),j.column()) = *j;
      B(j.column(),j.row()) = *j;
    }
  }
}

template <class MatA, class MatB>
inline void
twod_copy(const MatA& A, MatB B, triangle_tag) MTL_THROW_ASSERTION
{
  typedef typename matrix_traits<MatB>::value_type T;
  
  if (A.is_unit())
    set_diagonal(B, T(1));

  twod_copy(A, B, rectangle_tag());
}

template <class MatA, class MatB>
inline void
twod_copy(const MatA& A, MatB B, dense_tag)
{
  typedef typename matrix_traits<MatA>::shape Shape;
  twod_copy(A, B, Shape());
}


/*
  Sparse matrices have specialized copy functions since
  they need to optimize the creation of the non-zero structure.

  only good for same orientation!!!
 */


template <class MatA, class MatB>
inline void
twod_copy(const MatA& A, MatB B, row_tag, row_tag)
{
  B.fast_copy(A);
}
template <class MatA, class MatB>
inline void
twod_copy(const MatA& A, MatB B, column_tag, column_tag)
{
  B.fast_copy(A);
}

template <class MatA, class MatB, class OrienA, class OrienB>
inline void
twod_copy(const MatA& A, MatB B, OrienA, OrienB)
{
  twod_copy(A, B, dense_tag());
}

template <class MatA, class MatB>
inline void
twod_copy(const MatA& A, MatB B, sparse_tag)
{
  typedef typename matrix_traits<MatA>::orientation OrienA;
  typedef typename matrix_traits<MatB>::orientation OrienB;
  twod_copy(A, B, OrienA(), OrienB());
}

template <class MatA, class MatB>
inline void
__copy(const MatA& A, MatB B, twod_tag) MTL_THROW_ASSERTION
{
  MTL_ASSERT(A.nrows() <= B.nrows(), "copy(A, B, twod_tag)");
  MTL_ASSERT(A.ncols() <= B.ncols(), "copy(A, B, twod_tag)");

  typedef typename matrix_traits<MatB>::sparsity Sparsity;
  twod_copy(A, B, Sparsity());
}

//: Copy:  <tt>B <- A or y <- x</tt>
//
//  Copy the elements of matrix A into matrix B, or copy the elements
//  of vector x into vector y. For shaped and sparse matrices, this
//  copies only the elements stored in A to B.  If x is a sparse
//  vector and y is dense, a "scatter" is performed. If y is sparse
//  and x is dense, then a "gather" is performed. If both vectors
//  are sparse, but of different structure the result is undefined.
// 
//!category: algorithms
//!component: function
//!definition: mtl.h
//!complexity: O(m*n) for matrices. O(nnz) if either A or B are sparse and of the same orientation (otherwise it can be O(nnz^2). O(n) for vectors.
//!example: vecvec_copy.cc
template <class LinalgA, class LinalgB>
inline void
copy(const LinalgA& A, LinalgB B) MTL_THROW_ASSERTION
{
  typedef typename linalg_traits<LinalgA>::dimension Dim;
  __copy(A, B, Dim());
}

template <class VecX, class VecY> inline
void
__add(const VecX& x, VecY y, fast::count<0>)
{
  typedef typename VecX::value_type T;
  mtl_algo::transform(x.begin(), x.end(), y.begin(), 
		      y.begin(), std::plus<T>());
}
#if USE_BLAIS
template <class VecX, class VecY, int N> inline
void
__add(const VecX& x, VecY y, fast::count<N>)
{
  typedef typename VecX::value_type T;
  fast::transform(x.begin(), fast::count<N>(), y.begin(), 
		  y.begin(), std::plus<T>());
}
#endif
template <class VecX, class VecY> inline
void
__add(const VecX& x, VecY y, oned_tag) MTL_THROW_ASSERTION
{
  MTL_ASSERT(x.size() <= y.size(), "vecvec::add()");

  __add(x, y, dim_n<VecX>::RET());
}


template <class VecX, class VecY, class VecZ> inline
void
oned_add(const VecX& x, const VecY& y, VecZ z, fast::count<0>)
{
  typedef typename VecX::value_type T;
  mtl_algo::transform(x.begin(), x.end(), y.begin(), z.begin(), std::plus<T>());
}
#if USE_BLAIS
template <class VecX, class VecY, class VecZ, int N> inline
void
oned_add(const VecX& x, const VecY& y, VecZ z, fast::count<N>)
{
  typedef typename VecX::value_type T;
  fast::transform(x.begin(), fast::count<N>(), y.begin(), z.begin(), std::plus<T>());
}
#endif

template <class VecX, class VecY, class VecZ>
inline void
oned_add(const VecX& x, const VecY& y, VecZ z, sparse_tag)
{
  typedef typename VecZ::iterator ziter;
  typedef typename VecX::const_iterator xiter;
  typedef typename VecY::const_iterator yiter;
  
  xiter xi = x.begin();
  xiter xiend = x.end();
  yiter yi = y.begin();
  yiter yiend = y.end();
  
  while (xi != xiend && yi != yiend) {
    if (yi.index() < xi.index()) {
      z.push_back(yi.index(), *yi);
      ++yi;
    } else if (xi.index() < yi.index()) {
      z.push_back(xi.index(), *xi);
      ++xi;
    } else {
      z.push_back(xi.index(), *yi + *xi);
      ++xi; ++yi;
    }
  }
  while (xi != xiend) {
    z.push_back(xi.index(), *xi);
    ++xi;
  }
  while (yi != yiend) {
    z.push_back(yi.index(), *yi);
    ++yi;
  }
}

template <class VecX, class VecY, class VecZ>
inline void
oned_add(const VecX& x, const VecY& y, VecZ z, dense_tag) MTL_THROW_ASSERTION
{
  oned_add(x, y, z, dim_n<VecX>::RET());
}

//: Add:  <tt>z <- x + y</tt>
//
// Add the elements of x and y and assign into z.
//
//!category: algorithms
//!component: function
//!definition: mtl.h
//!example: y_ax_y.cc, vecvec_add.cc
//!typereqs: <tt>VecX::value_type</tt>,  <tt>VecY::value_type</tt>,  and  <tt>VecZ::value_type</tt> should be the same type
//!typereqs: The addition operator must be defined for the value_type.
//!complexity: linear time
template <class VecX, class VecY, class VecZ>
inline void
add(const VecX& x, const VecY& y, VecZ z) MTL_THROW_ASSERTION
{
  MTL_ASSERT(x.size() <= y.size(), "vecvec::add()");
  MTL_ASSERT(x.size() <= z.size(), "vecvec::add()");
  typedef typename linalg_traits<VecZ>::sparsity Sparsity;
  oned_add(x, y, z, Sparsity());
}

//: Add:  <tt>w <- x + y + z</tt>
//
// Add the elements of x, y, and z and assign into w.
//
//!category: algorithms
//!component: function
//!definition: mtl.h
//!example: vecvec_add3.cc
//!typereqs: <tt>VecX::value_type</tt>, <tt>VecY::value_type</tt>, <tt>VecZ::value_type</tt>, and <tt>VecW::value_type</tt> should be the same type
//!typereqs: The addition operator must be defined for the value_type.
//!complexity: linear time
template <class VecW, class VecX, class VecY, class VecZ>
inline void
add(const VecX& x, const VecY& y, const VecZ& z, VecW w) MTL_THROW_ASSERTION
{
  MTL_ASSERT(x.size() <= y.size(), "vecvec::add()");
  MTL_ASSERT(x.size() <= z.size(), "vecvec::add()");
  MTL_ASSERT(x.size() <= w.size(), "vecvec::add()");

  typename VecX::const_iterator x_i = x.begin();
  typename VecY::const_iterator y_i = y.begin();
  typename VecZ::const_iterator z_i = z.begin();
  typename VecW::iterator w_i = w.begin();

  while (not_at(x_i, x.end())) {
    *w_i = *x_i + *y_i + *z_i;
    ++x_i; ++y_i; ++z_i; ++w_i;
  }
}

template <class MatA, class MatB, class Shape>
inline void
twod_add(const MatA& A, MatB B, Shape)
{
  typename MatA::const_iterator i;  
  typename MatA::OneD::const_iterator j, jend;

  for (i = A.begin(); i != A.end(); ++i) {
    j = (*i).begin(); jend = (*i).end();
    for (; j != jend; ++j)
      B(j.row(), j.column()) += *j;
  }
}

template <class MatA, class MatB>
inline void
twod_add(const MatA& A, MatB B, triangle_tag)
{
  typedef typename matrix_traits<MatA>::size_type Int;
  typedef typename matrix_traits<MatA>::value_type T;
  if (A.is_unit())
    for (Int i = 0; i < MTL_MIN(A.nrows(), A.ncols()); ++i)
      B(i,i) += T(1);

  twod_add(A, B, banded_tag());
}

/* perhaps I should add is_row() and is_column()
 methods to the matrices
 */
template <class MatA, class MatB>
inline void
twod_symmetric_add(const MatA& A, MatB B, row_tag)
{
  typename MatA::const_iterator i;  
  typename MatA::Row::const_iterator j, jend;

  for (i = A.begin(); i != A.end(); ++i) {
    j = (*i).begin();
    jend = (*i).end();
    if (A.is_upper()) { /* handle the diagonal elements */
      B(j.column(), j.row()) += *j;
      ++j;
    } else
      --jend;
    for (; j != jend; ++j) {
      B(j.row(), j.column()) += *j;
      B(j.column(), j.row()) += *j;
    }
    if (A.is_lower())
      B(j.column(), j.row()) += *j;
  }
}

template <class MatA, class MatB>
inline void
twod_symmetric_add(const MatA& A, MatB B, column_tag)
{
  typename MatA::const_iterator i;  
  typename MatA::Column::const_iterator j, jend;

  for (i = A.begin(); i != A.end(); ++i) {
    j = (*i).begin();
    jend = (*i).end();
    if (A.is_lower()) { /* handle the diagonal elements */
      B(j.column(), j.row()) += *j;
      ++j;
    } else
      --jend;
    for (; j != jend; ++j) {
      B(j.row(), j.column()) += *j;
      B(j.column(), j.row()) += *j;
    }
    if (A.is_upper())
      B(j.column(), j.row()) += *j;
  }
}


template <class MatA, class MatB>
inline void
twod_add(const MatA& A, MatB B, symmetric_tag)
{
  typedef typename matrix_traits<MatA>::orientation Orien;
  twod_symmetric_add(A, B, Orien());
}


template <class MatA, class MatB>
inline void
__add(const MatA& A, MatB B, twod_tag) MTL_THROW_ASSERTION
{
  MTL_ASSERT(A.nrows() <= B.nrows(), "matmat::add()");
  MTL_ASSERT(A.ncols() <= B.ncols(), "matmat::add()");

  typedef typename matrix_traits<MatA>::shape Shape;
  twod_add(A, B, Shape());
}

//: Add:  <tt>B <- A + B  or  y <- x + y</tt>
//  The function adds the element of A to B, or the elements of x to y.
//
//!category: algorithms
//!component: function
//!definition: mtl.h
//!complexity: O(m*n) for a dense A, O(nnz) for a sparse A. O(n) for a vector.

template <class LinalgA, class LinalgB>
inline void
add(const LinalgA& A, LinalgB B) MTL_THROW_ASSERTION
{

  typedef typename linalg_traits<LinalgA>::dimension Dim;
  __add(A, B, Dim());
}



template <class VecX, class VecY, class VecZ>
inline void
ele_mult(const VecX& x, const VecY& y, VecZ z, fast::count<0>)
{
  typedef typename VecX::value_type T;
  mtl_algo::transform(x.begin(), x.end(), y.begin(), z.begin(),
		      multiplies<T>());
}
#if USE_BLAIS
template <class VecX, class VecY, class VecZ, int N>
inline void
ele_mult(const VecX& x, const VecY& y, VecZ z, fast::count<N>)
{
  typedef typename VecX::value_type T;
  fast::transform(x.begin(), fast::count<N>(), y.begin(), z.begin(),
		  multiplies<T>());
}
#endif

//: Element-wise Multiplication:  <tt>z <- x O* y</tt>
//!category: algorithms
//!component: function
//!definition: mtl.h
//!example: vecvec_ele_mult.cc
template <class VecX, class VecY, class VecZ>
inline void
ele_mult(const VecX& x, const VecY& y, VecZ z) MTL_THROW_ASSERTION
{
  MTL_ASSERT(x.size() <= y.size(), "vecvec::ele_mult()");
  MTL_ASSERT(x.size() <= z.size(), "vecvec::ele_mult()");

  ele_mult(x, y, z, dim_n<VecX>::RET());
}



//: Element-wise Multiply:  <tt>B <- A O* B</tt>
//
//  This function multiplies each of the elements
//  of B by the corresponding element of A.
//
//!category: algorithms
//!component: function
//!definition: mtl.h
//!complexity: O(n^2)
template <class MatA, class MatB>
inline void
ele_mult(const MatA& A, MatB B) MTL_THROW_ASSERTION
{
  /* Note: have to iterator over B, since
   * elements of B may get zeroed out,
   * but zero elements of B stay zero
   */
  typename MatB::row_2Diterator B_i;  
  typename MatB::RowVector::iterator j, jend;

  for (i = B.begin(); i != B.end(); ++i) {
    j = (*i).begin(); jend = (*i).end();
    for (; j != jend; ++j)
      *j *= A(j.row(),j.column());
  }
}


//: Element-wise Division:  <tt>z <- x O/ y</tt>
//!category: algorithms
//!component: function
//!definition: mtl.h
//!example: vecvec_ele_div.cc
template <class VecX, class VecY, class VecZ>
inline void
ele_div(const VecX& x, const VecY& y, VecZ z) MTL_THROW_ASSERTION
{
  MTL_ASSERT(x.size() <= y.size(), "vecvec::ele_div()");
  MTL_ASSERT(x.size() <= z.size(), "vecvec::ele_div()");

  typedef typename VecX::value_type T;
  mtl_algo::transform(x.begin(), x.end(), y.begin(), z.begin(), divides<T>());
}




template <class VecX, class VecY>
inline void
swap(VecX x, VecY y, fast::count<0>)
{
  mtl_algo::swap_ranges(x.begin(), x.end(), y.begin());
}  
#if USE_BLAIS
template <class VecX, class VecY, int N>
inline void
swap(VecX x, VecY y, fast::count<N>)
{
  fast::swap_ranges(x.begin(), fast::count<N>(), y.begin());
}  
#endif

template <class VecX, class VecY>
inline void
swap(VecX x, VecY y, oned_tag) MTL_THROW_ASSERTION
{
  MTL_ASSERT(x.size() <= y.size(), "vecvec::swap()");
  swap(x, y, dim_n<VecX>::RET());
}  



template <class MatA, class MatB>
inline void
swap(MatA A, MatB B, twod_tag) MTL_THROW_ASSERTION
{
  MTL_ASSERT(A.nrows() == B.nrows(), "matmat::swap()");
  MTL_ASSERT(A.ncols() == B.ncols(), "matmat::swap()");

  typename MatA::iterator A_i;
  typename MatA::OneD::iterator A_ij, A_ijend;
  typename MatB::iterator B_i;  
  typename MatB::Row::iterator B_ij;
  
  A_i = A.begin();  B_i = B.begin();
  while (A_i != A.end()) {
    A_ij = (*A_i).begin();  B_ij = (*B_i).begin();
    A_ijend = (*A_i).end();
    while (A_ij != A_ijend) {
      typename matrix_traits<MatA>::value_type tmp = *B_ij;
      *B_ij = *A_ij;
      *A_ij = tmp;
      ++A_ij; ++B_ij;
    }
    ++A_i; ++B_i;
  }
}


//: Swap:   <tt>B <-> A or y <-> x</tt>
//
// Exchanges the elements of the containers.
//  Not compatible with sparse matrices. For banded matrices
//  and other shaped matrices, A and B must be the same shape.
//  Also, the two matrices must be the same orientation.
//
//!category: algorithms
//!component: function
//!definition: mtl.h
//!complexity: O(n^2)
//!example: vecvec_swap.cc
template <class LinalgA, class LinalgB>
inline void
swap(LinalgA A, LinalgB B) MTL_THROW_ASSERTION
{
  typedef typename linalg_traits<LinalgA>::dimension Dim;
  swap(A, B, Dim());
}


template <class VecX, class VecY, class T>
inline T
dot(const VecX& x, const VecY& y, T s, fast::count<0>)
{
  return mtl_algo::inner_product(x.begin(), x.end(), y.begin(), s);
}
#if USE_BLAIS
template <class VecX, class VecY, class T, int N>
inline T
dot(const VecX& x, const VecY& y, T s, fast::count<N>)
{
  return fast::inner_product(x.begin(), fast::count<N>(), y.begin(), s);
}
#endif

//: Dot Product:  <tt>s <- x . y + s</tt>
//  The type used for argument s determines the
//  type of the resulting product.
//!category: algorithms
//!component: function
//!definition: mtl.h
template <class VecX, class VecY, class T>
inline T
dot(const VecX& x, const VecY& y, T s) MTL_THROW_ASSERTION
{
  MTL_ASSERT(x.size() <= y.size(), "vecvec::dot()");
  return dot(x, y, s, dim_n<VecX>::RET());
}


//: Dot Product:  <tt>s <- x . y</tt>
//  The type of the resulting product is <TT>VecX::value_type</TT>.
//!category: algorithms
//!component: function
//!example: vecvec_dot.cc, dot_prod.cc
//!definition: mtl.h
template <class VecX, class VecY>
inline typename VecX::value_type
dot(const VecX& x, const VecY& y) MTL_THROW_ASSERTION
{
  typedef typename VecX::value_type T;
  return mtl::dot(x, y, T(0));
}


struct conj_func {
  template <class T>
  T operator()(const T& x) { return std::conj(x); }
};

template <class VecX, class VecY, class T>
inline T
dot_conj(const VecX& x, const VecY& y, T s, fast::count<0>)
{
  return mtl_algo::inner_product(x.begin(), x.end(),
				 trans_iter(y.begin(), conj_func()), s);
}
#if USE_BLAIS
template <class VecX, class VecY, class T, int N>
inline T
dot_conj(const VecX& x, const VecY& y, T s, fast::count<N>)
{
  return fast::inner_product(x.begin(), x.end(),
			     trans_iter(y.begin(), conj_func()), s);
}
#endif

//: Dot Conjugate:  <tt>s <- x . conj(y) + s</tt>
//   Similar to dot product. The complex conjugate of the elements of y
//   is used. For real numbers, the conjugate is just that real number.
//   Note that the type of parameter s is the return type of this
//   function.
//!category: algorithms
//!component: function
//!definition: mtl.h
template <class VecX, class VecY, class T>
inline T
dot_conj(const VecX& x, const VecY& y, T s) MTL_THROW_ASSERTION
{
  MTL_ASSERT(x.size() <= y.size(), "vecvec::dot_conj()");
  return dot_conj(x, y, s, dim_n<VecX>::RET());
}

//: Dot Conjugate:   <tt>s <- x . conj(y)</tt>
//  A slightly simpler version of the dot conjugate.
//  The return type is the element type of vector x.
//!category: algorithms
//!component: function
//!definition: mtl.h
template <class VecX, class VecY>
inline typename VecX::value_type
dot_conj(const VecX& x, const VecY& y) MTL_THROW_ASSERTION
{
  typedef typename VecX::value_type T;
  return mtl::dot_conj(x, y, T(0));
}






} /* namespace mtl */

#endif /* _MTL_MTL_H_ */

