Scythe-1.0.3
la.h
Go to the documentation of this file.
00001 /* 
00002  * Scythe Statistical Library Copyright (C) 2000-2002 Andrew D. Martin
00003  * and Kevin M. Quinn; 2002-present Andrew D. Martin, Kevin M. Quinn,
00004  * and Daniel Pemstein.  All Rights Reserved.
00005  *
00006  * This program is free software; you can redistribute it and/or
00007  * modify under the terms of the GNU General Public License as
00008  * published by Free Software Foundation; either version 2 of the
00009  * License, or (at your option) any later version.  See the text files
00010  * COPYING and LICENSE, distributed with this source code, for further
00011  * information.
00012  * --------------------------------------------------------------------
00013  *  scythestat/la.h
00014  *
00015  */
00016 
00034 #ifndef SCYTHE_LA_H
00035 #define SCYTHE_LA_H
00036 
00037 #ifdef SCYTHE_COMPILE_DIRECT
00038 #include "matrix.h"
00039 #include "algorithm.h"
00040 #include "error.h"
00041 #ifdef SCYTHE_LAPACK
00042 #include "lapack.h"
00043 #endif
00044 #else
00045 #include "scythestat/matrix.h"
00046 #include "scythestat/algorithm.h"
00047 #include "scythestat/error.h"
00048 #ifdef SCYTHE_LAPACK
00049 #include "scythestat/lapack.h"
00050 #endif
00051 #endif
00052 
00053 #include <numeric>
00054 #include <algorithm>
00055 #include <set>
00056 
00057 namespace scythe {
00058 
00059   namespace {
00060     typedef unsigned int uint;
00061   }
00062 
00063   /* Matrix transposition */
00064 
00077   template <matrix_order RO, matrix_style RS, typename T,
00078             matrix_order PO, matrix_style PS>
00079   Matrix<T, RO, RS>
00080   t (const Matrix<T,PO,PS>& M)
00081   {
00082     uint rows = M.rows();
00083     uint cols = M.cols();
00084     Matrix<T,RO,Concrete> ret(cols, rows, false);
00085     if (PO == Col)
00086       copy<Col,Row>(M, ret);
00087     else
00088       copy<Row,Col>(M, ret);
00089 
00090     SCYTHE_VIEW_RETURN(T, RO, RS, ret)
00091   }
00092 
00093   template <typename T, matrix_order O, matrix_style S>
00094   Matrix<T, O, Concrete>
00095   t (const Matrix<T,O,S>& M)
00096   {
00097     return t<O,Concrete>(M);
00098   }
00099 
00100   /* Ones matrix generation */
00101   
00115   template <typename T, matrix_order O, matrix_style S>
00116   Matrix<T,O,S>
00117   ones (unsigned int rows, unsigned int cols)
00118   {
00119     return Matrix<T,O,S> (rows, cols, true, (T) 1);
00120   }
00121 
00122   template <typename T, matrix_order O>
00123   Matrix<T, O, Concrete>
00124   ones (unsigned int rows, unsigned int cols)
00125   {
00126     return ones<T, O, Concrete>(rows, cols);
00127   }
00128 
00129   template <typename T>
00130   Matrix<T, Col, Concrete>
00131   ones (unsigned int rows, unsigned int cols)
00132   {
00133     return ones<T, Col, Concrete>(rows, cols);
00134   }
00135 
00136   inline Matrix<double, Col, Concrete>
00137   ones (unsigned int rows, unsigned int cols)
00138   {
00139     return ones<double, Col, Concrete>(rows, cols);
00140   }
00141 
00142   /* Identity  Matrix generation */
00143 
00144   // This functor contains the working parts of the eye algorithm.
00145   namespace {
00146     template <class T> struct eye_alg {
00147         T operator() (uint i, uint j) {
00148           if (i == j)
00149             return (T) 1.0;
00150           return (T) 0.0;
00151         }
00152     };
00153   }
00154   
00173   template <typename T, matrix_order O, matrix_style S>
00174   Matrix<T,O,S>
00175   eye (unsigned int k)
00176   {
00177     Matrix<T,O,Concrete> ret(k, k, false);
00178     for_each_ij_set(ret, eye_alg<T>());
00179     SCYTHE_VIEW_RETURN(T, O, S, ret)
00180   }
00181 
00182   template <typename T, matrix_order O>
00183   Matrix<T, O, Concrete>
00184   eye (uint k)
00185   {
00186     return eye<T, O, Concrete>(k);
00187   }
00188 
00189   template <typename T>
00190   Matrix<T, Col, Concrete>
00191   eye (uint k)
00192   {
00193     return eye<T, Col, Concrete>(k);
00194   }
00195 
00196   inline Matrix<double, Col, Concrete>
00197   eye (uint k)
00198   {
00199     return eye<double, Col, Concrete>(k);
00200   }
00201 
00202   /* Create a k x 1 vector-additive sequence matrix */
00203 
00204   // The seqa algorithm
00205   namespace {
00206     template <typename T> struct seqa_alg {
00207       T cur_; T inc_;
00208       seqa_alg(T start, T inc) : cur_ (start), inc_ (inc)  {}
00209       T operator() () { T ret = cur_; cur_ += inc_; return ret; }
00210     };
00211   }
00212   
00235   template <typename T, matrix_order O, matrix_style S>
00236   Matrix<T,O,S>
00237   seqa (T start, T incr, uint rows)
00238   {
00239     Matrix<T,O,Concrete> ret(rows, 1, false);
00240     generate(ret.begin_f(), ret.end_f(), seqa_alg<T>(start, incr));
00241     SCYTHE_VIEW_RETURN(T, O, S, ret)
00242   }
00243 
00244   template <typename T, matrix_order O>
00245   Matrix<T, O, Concrete>
00246   seqa (T start, T incr, uint rows)
00247   {
00248     return seqa<T, O, Concrete>(start, incr, rows);
00249   }
00250 
00251   template <typename T>
00252   Matrix<T, Col, Concrete>
00253   seqa (T start, T incr, uint rows)
00254   {
00255     return seqa<T, Col, Concrete>(start, incr, rows);
00256   }
00257 
00258   inline Matrix<double, Col, Concrete>
00259   seqa (double start, double incr, uint rows)
00260   {
00261     return seqa<double, Col, Concrete>(start, incr, rows);
00262   }
00263 
00264   /* Uses the STL sort to sort a Matrix in ascending row-major order */
00265   
00279   template <matrix_order SORT_ORDER,
00280             matrix_order RO, matrix_style RS, typename T,
00281             matrix_order PO, matrix_style PS>
00282   Matrix<T, RO, RS>
00283   sort (const Matrix<T, PO, PS>& M)
00284   {
00285     Matrix<T,RO,Concrete> ret = M;
00286 
00287     std::sort(ret.template begin<SORT_ORDER>(), 
00288               ret.template end<SORT_ORDER>());
00289 
00290     SCYTHE_VIEW_RETURN(T, RO, RS, ret)
00291   }
00292 
00293   template <matrix_order SORT_ORDER,
00294             typename T, matrix_order O, matrix_style S>
00295   Matrix<T, O, Concrete>
00296   sort (const Matrix<T,O,S>& M)
00297   {
00298     return sort<SORT_ORDER, O, Concrete>(M);
00299   }
00300 
00301   template <typename T, matrix_order O, matrix_style S>
00302   Matrix<T, O, Concrete>
00303   sort (const Matrix<T,O,S>& M)
00304   {
00305     return sort<O, O, Concrete>(M);
00306   }
00307 
00319   template <matrix_order RO, matrix_style RS, typename T,
00320             matrix_order PO, matrix_style PS>
00321   Matrix<T, RO, RS>
00322   sortc (const Matrix<T, PO, PS>& M)
00323   {
00324     Matrix<T,RO,Concrete> ret = M;
00325 
00326     // TODO need to figure out a way to do fully optimized
00327     // vector iteration
00328     for (uint col = 0; col < ret.cols(); ++col) {
00329       Matrix<T,PO,View> column = ret(_, col);
00330       std::sort(column.begin(), column.end());
00331     }
00332 
00333     SCYTHE_VIEW_RETURN(T, RO, RS, ret)
00334   }
00335 
00336   template <typename T, matrix_order O, matrix_style S>
00337   Matrix<T, O, Concrete>
00338   sortc(const Matrix<T,O,S>& M)
00339   {
00340     return sortc<O,Concrete>(M);
00341   }
00342 
00343   /* Column bind two matrices */
00344   
00360   template <matrix_order RO, matrix_style RS, typename T,
00361             matrix_order PO1, matrix_style PS1,
00362             matrix_order PO2, matrix_style PS2>
00363   Matrix<T,RO,RS>
00364   cbind (const Matrix<T,PO1,PS1>& A, const Matrix<T,PO2,PS2>& B)
00365   {
00366     SCYTHE_CHECK_10(A.rows() != B.rows(), scythe_conformation_error,
00367         "Matrices have different numbers of rows");
00368 
00369     Matrix<T,RO,Concrete> ret(A.rows(), A.cols() + B.cols(), false);
00370     std::copy(B.template begin_f<Col>(), B.template end_f<Col>(),
00371               std::copy(A.template begin_f<Col>(), 
00372                         A.template end_f<Col>(), 
00373                         ret.template begin_f<Col>()));
00374     SCYTHE_VIEW_RETURN(T, RO, RS, ret)
00375   }
00376 
00377 
00378   template <typename T, matrix_order PO1, matrix_style PS1,
00379             matrix_order PO2, matrix_style PS2>
00380   Matrix<T,PO1,Concrete>
00381   cbind (const Matrix<T,PO1,PS1>& A, const Matrix<T,PO2,PS2>& B)
00382   {
00383     return cbind<PO1,Concrete>(A, B);
00384   }
00385 
00386   /* Row bind two matrices */
00387   
00403   template <matrix_order RO, matrix_style RS, typename T,
00404             matrix_order PO1, matrix_style PS1,
00405             matrix_order PO2, matrix_style PS2>
00406   Matrix<T,RO,RS>
00407   rbind (const Matrix<T,PO1,PS1>& A, const Matrix<T,PO2,PS2>& B)
00408   {
00409     SCYTHE_CHECK_10(A.cols() != B.cols(), scythe_conformation_error,
00410         "Matrices have different numbers of columns");
00411 
00412     Matrix<T,RO,Concrete> ret(A.rows() + B.rows(), A.cols(), false);
00413     std::copy(B.template begin_f<Row>(), B.template end_f<Row>(),
00414               std::copy(A.template begin_f<Row>(), 
00415                         A.template end_f<Row>(), 
00416                         ret.template begin_f<Row>()));
00417     SCYTHE_VIEW_RETURN(T, RO, RS, ret)
00418   }
00419 
00420   template <typename T, matrix_order PO1, matrix_style PS1,
00421             matrix_order PO2, matrix_style PS2>
00422   Matrix<T,PO1,Concrete>
00423   rbind (const Matrix<T,PO1,PS1>& A, const Matrix<T,PO2,PS2>& B)
00424   {
00425     return rbind<PO1,Concrete>(A, B);
00426   }
00427 
00428   /* Calculates the order of each element in a Matrix */
00429 
00430   // Functor encapsulating the meat of the algorithm
00431   namespace {
00432     template <class T,matrix_order O,matrix_style S> struct order_alg {
00433       Matrix<T,O> M_;
00434       order_alg (const Matrix<T,O,S>& M) : M_ (M) {}
00435       uint operator() (T x) {
00436         Matrix<bool,O> diff = (M_ < x);
00437         return std::accumulate(diff.begin_f(), diff.end_f(), (uint) 0);
00438       }
00439     };
00440   }
00441     
00455   /* NOTE This function used to only work on column vectors.  I see no
00456    * reason to maintain this restriction.
00457    */
00458   template <matrix_order RO, matrix_style RS, typename T,
00459             matrix_order PO, matrix_style PS>
00460   Matrix<unsigned int, RO, RS>
00461   order (const Matrix<T, PO, PS>& M)
00462   {
00463     Matrix<uint, RO, Concrete> ranks(M.rows(), M.cols(), false);
00464     std::transform(M.begin_f(), M.end_f(), ranks.template begin_f<PO>(),
00465                    order_alg<T, PO, PS>(M));
00466     SCYTHE_VIEW_RETURN(uint, RO, RS, ranks)
00467   }
00468 
00469   template <typename T, matrix_order O, matrix_style S>
00470   Matrix<unsigned int, O, Concrete>
00471   order (const Matrix<T,O,S>& M)
00472   {
00473     return order<O,Concrete>(M);
00474   }
00475   
00476   /* Selects all the rows of Matrix A for which binary column vector e
00477    * has an element equal to 1
00478    */
00479    
00496   template <matrix_order RO, matrix_style RS, typename T,
00497             matrix_order PO1, matrix_style PS1,
00498             matrix_order PO2, matrix_style PS2>
00499   Matrix<T,RO,RS>
00500   selif (const Matrix<T,PO1,PS1>& M, const Matrix<bool,PO2,PS2>& e)
00501   {
00502     SCYTHE_CHECK_10(M.rows() != e.rows(), scythe_conformation_error,
00503      "Data matrix and selection vector have different number of rows");
00504     SCYTHE_CHECK_10(! e.isColVector(), scythe_dimension_error,
00505         "Selection matrix is not a column vector");
00506 
00507     uint N = std::accumulate(e.begin_f(), e.end_f(), (uint) 0);
00508     Matrix<T,RO,Concrete> res(N, M.cols(), false);
00509     int cnt = 0;
00510     for (uint i = 0; i < e.size(); ++i) {
00511       if (e[i]) {
00512         Matrix<T,RO,View> Mvec = M(i, _);
00513         // TODO again, need optimized vector iteration
00514         std::copy(Mvec.begin_f(), Mvec.end_f(), 
00515             res(cnt++, _).begin_f());
00516       }
00517     }
00518 
00519     SCYTHE_VIEW_RETURN(T, RO, RS, res)
00520   }
00521  
00522   template <typename T, matrix_order PO1, matrix_style PS1,
00523             matrix_order PO2, matrix_style PS2>
00524   Matrix<T,PO1,Concrete>
00525   selif (const Matrix<T,PO1,PS1>& M, const Matrix<bool,PO2,PS2>& e)
00526   {
00527     return selif<PO1,Concrete>(M, e);
00528   }
00529 
00530   /* Find unique elements in a matrix and return a sorted row vector */
00544   template <matrix_order RO, matrix_style RS, typename T,
00545             matrix_order PO, matrix_style PS>
00546   Matrix<T, RO, RS>
00547   unique (const Matrix<T, PO, PS>& M)
00548   {
00549     std::set<T> u(M.begin_f(), M.end_f());
00550     Matrix<T,RO,Concrete> res(1, u.size(), false);
00551     std::copy(u.begin(), u.end(), res.begin_f());
00552 
00553     SCYTHE_VIEW_RETURN(T, RO, RS, res)
00554   }
00555 
00556   template <typename T, matrix_order O, matrix_style S>
00557   Matrix<T, O, Concrete>
00558   unique (const Matrix<T,O,S>& M)
00559   {
00560     return unique<O,Concrete>(M);
00561   }
00562 
00563   /* NOTE I killed reshape.  It seems redundant with resize. DBP */
00564 
00565 
00566   /* Make vector out of unique elements of a symmetric Matrix.  
00567    */
00568    
00588   template <matrix_order RO, matrix_style RS, typename T,
00589             matrix_order PO, matrix_style PS>
00590   Matrix<T, RO, RS>
00591   vech (const Matrix<T, PO, PS>& M)
00592   {
00593     SCYTHE_CHECK_30(! M.isSymmetric(), scythe_dimension_error,
00594         "Matrix not symmetric");
00595 
00596     Matrix<T,RO,Concrete> 
00597       res((uint) (0.5 * (M.size() - M.rows())) + M.rows(), 1, false);
00598     typename Matrix<T,RO,Concrete>::forward_iterator it = res.begin_f();
00599 
00600     /* We want to traverse M in storage order if possible so we take
00601      * the upper triangle of row-order matrices and the lower triangle
00602      * of column-order matrices.
00603      */
00604     if (M.storeorder() == Col) {
00605       for (uint i = 0; i < M.rows(); ++i) {
00606         Matrix<T,PO,View> strip = M(i, i, M.rows() - 1, i);
00607         it = std::copy(strip.begin_f(), strip.end_f(), it);
00608       }
00609     } else {
00610       for (uint j = 0; j < M.cols(); ++j) {
00611         Matrix<T,PO,View> strip = M(j, j, j, M.cols() - 1);
00612         it = std::copy(strip.begin_f(), strip.end_f(), it);
00613       }
00614     }
00615 
00616     SCYTHE_VIEW_RETURN(T, RO, RS, res)
00617   }
00618 
00619   template <typename T, matrix_order O, matrix_style S>
00620   Matrix<T, O, Concrete>
00621   vech (const Matrix<T,O,S>& M)
00622   {
00623     return vech<O,Concrete>(M);
00624   }
00625 
00638   template <matrix_order RO, matrix_style RS, typename T,
00639             matrix_order PO, matrix_style PS>
00640   Matrix<T, RO, RS>
00641   xpnd (const Matrix<T, PO, PS>& v)
00642   {
00643     double size_d = -.5 + .5 * std::sqrt(1. + 8 * v.size());
00644     SCYTHE_CHECK_10(std::fmod(size_d, 1.) != 0., 
00645         scythe_dimension_error, 
00646         "Input vector can't generate square matrix");
00647 
00648     uint size = (uint) size_d;
00649     Matrix<T,RO,Concrete> res(size, size, false);
00650 
00651     /* It doesn't matter if we travel in order here.
00652      * TODO Might want to use iterators.
00653      */
00654     uint cnt = 0;
00655     for (uint i = 0; i < size; ++i)
00656       for (uint j = i; j < size; ++j)
00657         res(i, j) = res(j, i) = v[cnt++];
00658 
00659     SCYTHE_VIEW_RETURN(T, RO, RS, res)
00660   }
00661 
00662   template <typename T, matrix_order O, matrix_style S>
00663   Matrix<T, O, Concrete>
00664   xpnd (const Matrix<T,O,S>& v)
00665   {
00666     return xpnd<O,Concrete>(v);
00667   }
00668 
00669   /* Get the diagonal of a Matrix. */
00670   
00682   template <matrix_order RO, matrix_style RS, typename T,
00683             matrix_order PO, matrix_style PS>
00684   Matrix<T, RO, RS>
00685   diag (const Matrix<T, PO, PS>& M)
00686   {
00687     Matrix<T,RO,Concrete> res(std::min(M.rows(), M.cols()), 1, false);
00688     
00689     /* We want to use iterators to maximize speed for both concretes
00690      * and views, but we always want to tranvers M in order to avoid
00691      * slowing down concretes.
00692      */
00693     uint incr = 1;
00694     if (PO == Col)
00695       incr += M.rows();
00696     else
00697       incr += M.cols();
00698 
00699     typename Matrix<T,PO,PS>::const_iterator pit;
00700     typename Matrix<T,RO,Concrete>::forward_iterator rit 
00701       = res.begin_f();
00702     for (pit = M.begin(); pit < M.end(); pit += incr)
00703       *rit++ = *pit;
00704 
00705     SCYTHE_VIEW_RETURN(T, RO, RS, res)
00706   }
00707 
00708   template <typename T, matrix_order O, matrix_style S>
00709   Matrix<T, O, Concrete>
00710   diag (const Matrix<T,O,S>& M)
00711   {
00712     return diag<O,Concrete>(M);
00713   }
00714 
00715   /* Fast calculation of A*B+C. */
00716 
00717   namespace {
00718     // Algorithm when one matrix is 1x1
00719     template <matrix_order RO, typename T,
00720               matrix_order PO1, matrix_style PS1,
00721               matrix_order PO2, matrix_style PS2>
00722     void
00723     gaxpy_alg(Matrix<T,RO,Concrete>& res, const Matrix<T,PO1,PS1>& X,
00724               const Matrix<T,PO2,PS2>& B, T constant)
00725     {
00726       res = Matrix<T,RO,Concrete>(X.rows(), X.cols(), false);
00727       if (maj_col<RO,PO1,PO2>())
00728         std::transform(X.template begin_f<Col>(), 
00729                        X.template end_f<Col>(),
00730                        B.template begin_f<Col>(),
00731                        res.template begin_f<Col>(),
00732                        ax_plus_b<T>(constant));
00733       else
00734         std::transform(X.template begin_f<Row>(), 
00735                        X.template end_f<Row>(),
00736                        B.template begin_f<Row>(),
00737                        res.template begin_f<Row>(),
00738                        ax_plus_b<T>(constant));
00739     }
00740   }
00741 
00768   template <matrix_order RO, matrix_style RS, typename T,
00769             matrix_order PO1, matrix_style PS1,
00770             matrix_order PO2, matrix_style PS2,
00771             matrix_order PO3, matrix_style PS3>
00772   Matrix<T,RO,RS>
00773   gaxpy (const Matrix<T,PO1,PS1>& A, const Matrix<T,PO2,PS2>& B,
00774          const Matrix<T,PO3,PS3>& C)
00775   {
00776     
00777     Matrix<T, RO, Concrete> res;
00778 
00779     if (A.isScalar() && B.rows() == C.rows() && B.cols() == C.cols()) {
00780       // Case 1: 1x1 * nXk + nXk
00781       gaxpy_alg(res, B, C, A[0]);
00782     } else if (B.isScalar() && A.rows() == C.rows() &&
00783                A.cols() == C.cols()) {
00784       // Case 2: m x n  *  1 x 1  +  m x n
00785       gaxpy_alg(res, A, C, B[0]);
00786     } else if (A.cols() == B.rows() && A.rows() == C.rows() &&
00787                B.cols() == C.cols()) {
00788       // Case 3: m x n  *  n x k  +  m x k
00789 
00790       res = Matrix<T,RO,Concrete> (A.rows(), B.cols(), false);
00791 
00792       /* These are identical to matrix mult, one optimized for
00793        * row-major and one for col-major.
00794        */
00795       
00796       T tmp;
00797       if (RO == Col) { // col-major optimized
00798        for (uint j = 0; j < B.cols(); ++j) {
00799          for (uint i = 0; i < A.rows(); ++i)
00800           res(i, j) = C(i, j);
00801          for (uint l = 0; l < A.cols(); ++l) {
00802            tmp = B(l, j);
00803            for (uint i = 0; i < A.rows(); ++i)
00804              res(i, j) += tmp * A(i, l);
00805          }
00806        }
00807       } else { // row-major optimized
00808        for (uint i = 0; i < A.rows(); ++i) {
00809          for (uint j = 0; j < B.cols(); ++j)
00810            res(i, j) = C(i, j);
00811          for (uint l = 0; l < B.rows(); ++l) {
00812            tmp = A(i, l);
00813            for (uint j = 0; j < B.cols(); ++j)
00814              res(i, j) += tmp * B(l,j);
00815          }
00816        }
00817       }
00818 
00819     } else {
00820       SCYTHE_THROW(scythe_conformation_error,
00821           "Expects (m x n  *  1 x 1  +  m x n)"
00822           << "or (1 x 1  *  n x k  +  n x k)"
00823           << "or (m x n  *  n x k  +  m x k)");
00824     }
00825 
00826     SCYTHE_VIEW_RETURN(T, RO, RS, res)
00827   }
00828 
00829   template <typename T, matrix_order PO1, matrix_style PS1,
00830             matrix_order PO2, matrix_style PS2,
00831             matrix_order PO3, matrix_style PS3>
00832   Matrix<T,PO1,Concrete>
00833   gaxpy (const Matrix<T,PO1,PS1>& A, const Matrix<T,PO2,PS2>& B,
00834          const Matrix<T,PO3,PS3>& C)
00835   {
00836     return gaxpy<PO1,Concrete>(A,B,C);
00837   }
00838 
00854   template <matrix_order RO, matrix_style RS, typename T,
00855             matrix_order PO, matrix_style PS>
00856   Matrix<T, RO, RS>
00857   crossprod (const Matrix<T, PO, PS>& A)
00858   {
00859     /* When rows > 1, we provide differing implementations of the
00860      * algorithm depending on A's ordering to maximize strided access.
00861      *
00862      * The non-vector version of the algorithm fills in a triangle and
00863      * then copies it over.
00864      */
00865     Matrix<T, RO, Concrete> res;
00866     T tmp;
00867     if (A.rows() == 1) {
00868       res = Matrix<T,RO,Concrete>(A.cols(), A.cols(), true);
00869       for (uint k = 0; k < A.rows(); ++k) {
00870         for (uint i = 0; i < A.cols(); ++i) {
00871           tmp = A(k, i);
00872           for (uint j = i; j < A.cols(); ++j) {
00873             res(j, i) =
00874               res(i, j) += tmp * A(k, j);
00875           }
00876         }
00877       }
00878     } else {
00879       if (PO == Row) { // row-major optimized
00880         /* TODO: This is a little slower than the col-major.  Improve.
00881          */
00882         res = Matrix<T,RO,Concrete>(A.cols(), A.cols(), true);
00883         for (uint k = 0; k < A.rows(); ++k) {
00884           for (uint i = 0; i < A.cols(); ++i) {
00885             tmp = A(k, i);
00886             for (uint j = i; j < A.cols(); ++j) {
00887                 res(i, j) += tmp * A(k, j);
00888             }
00889           }
00890         }
00891         for (uint i = 0; i < A.cols(); ++i)
00892           for (uint j = i + 1; j < A.cols(); ++j)
00893             res(j, i) = res(i, j);
00894       } else { // col-major optimized
00895         res = Matrix<T,RO,Concrete>(A.cols(), A.cols(), false);
00896         for (uint j = 0; j < A.cols(); ++j) {
00897           for (uint i = j; i < A.cols(); ++i) {
00898             tmp = (T) 0;
00899             for (uint k = 0; k < A.rows(); ++k)
00900               tmp += A(k, i) * A(k, j);
00901             res(i, j) = tmp;
00902           }
00903         }
00904         for (uint i = 0; i < A.cols(); ++i)
00905           for (uint j = i + 1; j < A.cols(); ++j)
00906             res(i, j) = res(j, i);
00907       }
00908     }
00909 
00910     SCYTHE_VIEW_RETURN(T, RO, RS, res)
00911   }
00912 
00913   template <typename T, matrix_order O, matrix_style S>
00914   Matrix<T, O, Concrete>
00915   crossprod (const Matrix<T,O,S>& M)
00916   {
00917     return crossprod<O,Concrete>(M);
00918   }
00919 
00920 #ifdef SCYTHE_LAPACK
00921   /* Template specializations of for col-major, concrete
00922    * matrices of doubles that are only available when a lapack library
00923    * is available.
00924    */
00925 
00926   template<>
00927   inline Matrix<>
00928   gaxpy<Col,Concrete,double,Col,Concrete,Col,Concrete,Col,Concrete>
00929   (const Matrix<>& A, const Matrix<>& B, const Matrix<>& C)
00930   {
00931     SCYTHE_DEBUG_MSG("Using lapack/blas for gaxpy");
00932     Matrix<> res;
00933 
00934     if (A.isScalar() && B.rows() == C.rows() && B.cols() == C.cols()) {
00935       // Case 1: 1x1 * nXk + nXk
00936       gaxpy_alg(res, B, C, A[0]);
00937     } else if (B.isScalar() && A.rows() == C.rows() &&
00938                A.cols() == C.cols()) {
00939       // Case 2: m x n  *  1 x 1  +  m x n
00940       gaxpy_alg(res, A, C, B[0]);
00941     } else if (A.cols() == B.rows() && A.rows() == C.rows() &&
00942                B.cols() == C.cols()) {
00943       res = C; // NOTE: this copy may eat up speed gains, but can't be
00944                //       avoided.
00945       
00946       // Case 3: m x n  *  n x k  +  m x k
00947       double* Apnt = A.getArray();
00948       double* Bpnt = B.getArray();
00949       double* respnt = res.getArray();
00950       const double one(1.0);
00951       int rows = (int) res.rows();
00952       int cols = (int) res.cols();
00953       int innerDim = A.cols();
00954 
00955       lapack::dgemm_("N", "N", &rows, &cols, &innerDim, &one, Apnt,
00956                      &rows, Bpnt, &innerDim, &one, respnt, &rows);
00957 
00958     }
00959       return res;
00960   }
00961 
00962   template<>
00963   inline Matrix<>
00964   crossprod(const Matrix<>& A)
00965   {
00966     SCYTHE_DEBUG_MSG("Using lapack/blas for crossprod");
00967     // Set up some constants
00968     const double zero = 0.0;
00969     const double one = 1.0;
00970 
00971     // Set up return value and arrays
00972     Matrix<> res(A.cols(), A.cols(), false);
00973     double* Apnt = A.getArray();
00974     double* respnt = res.getArray();
00975     int rows = (int) A.rows();
00976     int cols = (int) A.cols();
00977 
00978     lapack::dsyrk_("L", "T", &cols, &rows, &one, Apnt, &rows, &zero, respnt,
00979                    &cols);
00980     lapack::make_symmetric(respnt, cols);
00981 
00982     return res;
00983   }
00984 
00985 #endif
00986 
00987 
00988 } // end namespace scythe
00989 
00990 
00991 #endif /* SCYTHE_LA_H */