[jblas] 10/24: Imported Upstream version 1.1

Sun Jan 5 05:45:58 UTC 2014

This is an automated email from the git hooks/post-receive script.

tmancill pushed a commit to branch master
in repository jblas.

commit 09d574ff76493b6f73d6f75e44ae1178225239c2
Author: tony mancill <tmancill at debian.org>
Date:   Tue Dec 31 15:19:20 2013 -0800

    Imported Upstream version 1.1
---
 BUILDING_ATLAS                                     |  90 +++++--
 Makefile                                           |  32 ++-
 README                                             |   7 +-
 RELEASE_NOTES                                      |  14 +
 build.xml                                          |  54 ++--
 config/config_cc.rb                                |   2 +-
 config/config_fortran.rb                           |   4 +
 config/config_lapack_sources.rb                    |   9 +-
 config/config_libs.rb                              |  27 +-
 examples/complex_svd.rb                            |  34 +++
 fortranwrapper.dump                                | Bin 49328 -> 21305 bytes
 nbproject/project.xml                              |   4 +-
 scripts/fortran/java.rb                            | 298 ++++++++++++---------
 scripts/fortranwrapper.rb                          |   4 +
 scripts/java-impl.c                                |  38 ++-
 scripts/rjpp.rb                                    |   6 +-
 src/org/jblas/ComplexDoubleMatrix.java             |  16 +-
 src/org/jblas/ComplexFloatMatrix.java              |  16 +-
 src/org/jblas/ConvertsToDoubleMatrix.java          |   5 +-
 src/org/jblas/Decompose.java                       |   2 +
 src/org/jblas/DoubleMatrix.java                    |  34 ++-
 src/org/jblas/Eigen.java                           |   2 +
 src/org/jblas/FloatMatrix.java                     |  34 ++-
 src/org/jblas/NativeBlas.java                      | 228 +++++++---------
 src/org/jblas/Singular.java                        | 193 +++++++++++++
 src/org/jblas/Solve.java                           |  12 +-
 src/org/jblas/benchmark/BenchmarkResult.java       |   4 +-
 .../JavaDoubleMultiplicationBenchmark.java         |   2 +-
 .../JavaFloatMultiplicationBenchmark.java          |   2 +-
 src/org/jblas/benchmark/Main.java                  |  18 +-
 src/org/jblas/util/LibraryLoader.java              | 153 ++++++-----
 src/org/jblas/util/SanityChecks.java               |  45 +++-
 test/org/jblas/TestBlasDoubleComplex.java          |  51 ++--
 test/org/jblas/TestSolve.java                      |   9 +-
 34 files changed, 967 insertions(+), 482 deletions(-)

diff --git a/BUILDING_ATLAS b/BUILDING_ATLAS
index 44726d6..ecd1070 100644
--- a/BUILDING_ATLAS
+++ b/BUILDING_ATLAS
@@ -1,5 +1,5 @@
 BUILDING_ATLAS
---------------
+==============
 
 In this file, I summarize the different configuration options for the
 platforms.  I keep this information mainly for my own reference, but
@@ -14,8 +14,16 @@ work pretty well for both Intel and AMD chips. On 64bit, AMD64K10h is
 a bit better, but not much. On the other hand, the AMD settings
 perform too well on the Intel platform.
 
-OBTAINING THE SOFTWARE
-----------------------
+TABLE OF CONTENTS:
+
+1. OBTAINING THE SOFTWARE
+2. GENERAL REMARKS
+3. LINUX
+4. MAC OS X
+5. WINDOWS
+
+1. OBTAINING THE SOFTWARE
+=========================
 
 Getting lapack:
 
@@ -29,6 +37,35 @@ Getting jblas:
 
 git clone git://github.com/mikiobraun/jblas.git
 
+2. GENERAL REMARKS
+==================
+
+The general steps are as follows:
+
+* Compile lapack. Lapack uses blas for its low-level computations. ATLAS also
+  implements a subset of Lapack, but not all routines. But you can build ATLAS
+  to include the Lapack routines it does not implement, and this is what we're
+  going to do.
+
+* Compile ATLAS.
+
+* Compile the native part of jblas with "make".
+
+* Compile the java part of jblas with "ant".
+
+The ATLAS build process is a bit, well, unusual, coming with compiled (not scripted)
+configuration scripts. ATLAS has prebuilt configurations which result in much
+faster compilation times. If it doesn't find such a profile for your computer,
+it will perform a large number of timing experiments to find the best combination
+(hence the name "Automatically Tuned...").
+
+Finally, some older processors don't support the SSE3 command set. Some tweaking
+is necessary to compile ATLAS for SSE2, namely configuring as for SSE3, and
+then going into the file "Make.inc", and replacing SSE2 by SSE3 or removing -DATL_SSE3
+from the compiler directives.
+
+For reference, here is a list of the main ATLAS options.
+
 ATLAS OPTIONS
 -------------
 
@@ -52,14 +89,14 @@ ATLAS Architecture Options: (-A)
    12 = 'CoreSolo'
    13 = 'CoreDuo'
    14 = 'Core2Solo'
-   15 = 'Core2' <-
+   15 = 'Core2' <- used for 32 bit builds
    16 = 'Corei7'
    17 = 'P4'
    18 = 'P4E'
    19 = 'Efficeon'
    20 = 'K7'
-   21 = 'HAMMER' <- for "normal AMD"
-   22 = 'AMD64K10h' <- for "64bit AMD"
+   21 = 'HAMMER'
+   22 = 'AMD64K10h' <- used for 64 bit builds
    23 = 'UNKNOWNx86'
    24 = 'IA64Itan'
    25 = 'IA64Itan2'
@@ -86,13 +123,13 @@ ATLAS Operating System Options: (-O)
    10 = 'FreeBSD'
    11 = 'OSX'
 
-Linux
-=====
+3. Linux
+========
 
 LAPACK
 ------
 
-Copy make.inc.LINUX from INSTALL, update options like this:
+Copy make.inc.LINUX from INSTALL directory, update options in the file like this:
 
 FORTRAN  = gfortran 
 OPTS     = -O2 -fPIC
@@ -127,7 +164,7 @@ For 64 bit:
 as above, but configure with
 
 > ../configure \
-  --with-netlib-lapack=/home/mikio/build/lapack-lite-3.1.1/lapack_LINUX.a \
+  --with-netlib-lapack=$HOME/build/lapack-lite-3.1.1/lapack_LINUX.a \
   -A 21 -b 64 -Si cputhrchk 0 -Fa alg -fPIC  
 
 
@@ -137,8 +174,8 @@ jblas
 > ./configure --static-libs --libpath=$HOME/build/ATLAS/build/lib/
 
 
-Mac OS X
-========
+4. Mac OS X
+===========
 
 If you install the gfortran with mac ports, you get gcc-mp-4.3 and
 gfortran-mp-4.3. This means you have to redefine all the
@@ -178,8 +215,8 @@ It's important to set the 32bit flag. Otherwise, it seems to construct
 64 bit code.
 
 
-Windows XP (32bit)
-==================
+5. Windows XP (32bit)
+=====================
 
 I'm describing here using cygwin. For some reason, the gcc4 version in cygwin
 does not support compiling to mingw, but using the cygwin version leads
@@ -188,11 +225,28 @@ leading to a crash as soon as the cygwin compiled code is loaded.
 
 What all of this means is that you should use the old gcc3 "legacy" compilers.
 
-Back in ATLAS 3.6, one needed to set architectures explicitly. With 3.8.3, there is no
-need to do so, but you have to set the fortran compiler explicitly to "g77" (and
-the bit numbers to 32):
+Also watch out for directory names with spaces in them, ATLAS does not like
+them.
 
-../configure --with-netlib-lapack=$HOME/mikio/build/lapack-lite-3.1.1/liblapack_fortran.a -A 15 -C if g77 -b 32
+LAPACK
+------
 
+Compiling LAPACK just works as in Linux, but replace "gfortran" by "g77".
 
+ATLAS
+-----
+
+Back in ATLAS 3.6, one needed to set architectures explicitly. With
+3.8.3, there is no need to do so, but you have to set the fortran
+compiler explicitly to "g77" (and the bit numbers to 32):
+
+../configure --with-netlib-lapack=$HOME//build/lapack-lite-3.1.1/liblapack_fortran.a -A 15 -C if g77 -b 32
+
+64-bit
+------
 
+Unfortunately, there is no support for 64bit under Windows right now,
+mainly because there is no 64bit mingw compiler in cygwin. There
+exists the mingw-w64 bit project
+(http://sourceforge.net/projects/mingw-w64/), but I didn't managed to
+compile ATLAS using those.
diff --git a/Makefile b/Makefile
index 8f0be2d..ba3a80e 100644
--- a/Makefile
+++ b/Makefile
@@ -32,7 +32,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ## --- END LICENSE BLOCK ---
 
-VERSION=1.0.2
+VERSION=1.1
 
 ######################################################################
 #
@@ -91,17 +91,17 @@ generate-wrapper: $(GENERATED_SOURCES) $(GENERATED_HEADERS)
 
 # Clean all object files
 clean:
-	rm -f native/*.o native/*.$(SO) $(LIB_PATH)/*.$(SO) $(FULL_LIB_PATH)/*.$(SO) src/$(PACKAGE_PATH)/NativeBlas.java
+	rm -f native/*.o native/*.$(SO) $(LIB_PATH)/*.$(SO) $(FULL_LIB_PATH)/*.$(SO) src/$(PACKAGE_PATH)/NativeBlas.java generated-sources
 
 # Full clean, including information extracted from the fortranwrappers.
 # You will need the original fortran sources in order to rebuild
 # the wrappers.
 ifeq ($(LAPACK_HOME),)
-realclean:
+realclean: clean
 	@echo "Since you don't have LAPACK sources, I cannot rebuild stubs and deleting the cached information is not a good idea."
 	@echo "(nothing deleted)"
 else
-realclean:
+realclean: clean
 	rm -f fortranwrapper.dump
 endif
 
@@ -110,9 +110,20 @@ endif
 generated-sources: \
   scripts/fortranwrapper.rb scripts/fortran/types.rb \
   scripts/fortran/java.rb scripts/java-class.java scripts/java-impl.c \
-  src/org/jblas/util/ArchFlavor.java src/org/jblas/NativeBlas.java 
-	$(RUBY) scripts/fortranwrapper.rb $(PACKAGE) NativeBlas \
-	$(BLAS)/*.f \
+  src/org/jblas/util/ArchFlavor.java #src/org/jblas/NativeBlas.java 
+	$(RUBY) scripts/fortranwrapper.rb --complexcc $(CCC) $(PACKAGE) NativeBlas \
+	$(BLAS)/[sdcz]copy.f \
+	$(BLAS)/[sdcz]swap.f \
+	$(BLAS)/[sdcz]axpy.f \
+	$(BLAS)/[sdcz]scal.f \
+  $(BLAS)/[cz][sd]scal.f \
+	$(BLAS)/[sdcz]dot*.f \
+	$(BLAS)/[sd]*nrm2.f \
+	$(BLAS)/[sd]*asum.f \
+	$(BLAS)/i[sdcz]amax.f \
+	$(BLAS)/[sdcz]gemv.f \
+	$(BLAS)/[sdcz]ger*.f \
+	$(BLAS)/[sdcz]gemm.f \
 	$(LAPACK)/[sd]gesv.f \
 	$(LAPACK)/[sd]sysv.f \
 	$(LAPACK)/[sd]syev.f \
@@ -120,11 +131,14 @@ generated-sources: \
 	$(LAPACK)/[sd]posv.f \
 	$(LAPACK)/[sdcz]geev.f \
 	$(LAPACK)/[sd]getrf.f \
-	$(LAPACK)/[sd]potrf.f 
+	$(LAPACK)/[sd]potrf.f \
+	$(LAPACK)/[sdcz]gesvd.f
 	ant javah
 	touch $@
 
-native/NativeBlas.o: generated-sources
+native/NativeBlas.c: generated-sources
+
+native/NativeBlas.o: native/NativeBlas.c
 	$(CC) $(CFLAGS) $(INCDIRS) -c native/NativeBlas.c -o $@
 
 native/jblas_arch_flavor.o: generated-sources
diff --git a/README b/README
index 21a4d3f..739184f 100644
--- a/README
+++ b/README
@@ -1,6 +1,7 @@
 jblas is a matrix library for Java which uses existing high
 performance BLAS and LAPACK libraries like ATLAS.
 
+Version 1.1, August 16, 2010
 Version 1.0.2, February 26, 2010
 Version 1.0.1, January 14, 2010
 Version 1.0, December 22, 2009
@@ -14,8 +15,8 @@ Homepage: http://jblas.org
 
 INSTALL
 
-In principle, all you need is the jblas-1.0.2,jar in your
-classpath. jblas-1.0.2.jar will then automagically extract your platform
+In principle, all you need is the jblas-1.1,jar in your
+classpath. jblas-1.1.jar will then automagically extract your platform
 dependent native library to a tempfile and load it from there. You can
 also put that file somewhere in your load path ($LD_LIBRARY_PATH for
 Linux, %PATH for Windows).
@@ -46,7 +47,7 @@ HOW TO GET STARTED
 Have a look at javadoc/index.html and javadoc/org/jblas/DoubleMatrix.html
 
 If you want to validate your installation and get some performance
-numbers, try "java -server -jar jblas-1.0.2.jar", or drop the server
+numbers, try "java -server -jar jblas-1.1.jar", or drop the server
 in case, you only have the "client" JVM installed.
 
 
diff --git a/RELEASE_NOTES b/RELEASE_NOTES
index 8d37d7c..9b5276d 100644
--- a/RELEASE_NOTES
+++ b/RELEASE_NOTES
@@ -1,3 +1,17 @@
+Release 1.1 - August 16, 2010
+
+Added Singular Value Decomposition, some bugfixes
+
+- fixed bug with complex return values and gfortran vs. g77
+- added --ptatlas flag for compiling against multithreaded ATLAS
+- dropped support for 32bit Mac OS X
+
+The main reason for dropping support for 32bit Mac OS X was that
+the latest versions installs as 64bit by default, and the install
+for the 32bit macports gcc seem to be broken -> no way to compile it.
+
+----------------------------------------------------------------------
+
 Release 1.0.2 - February 26, 2010
 
 Mostly a bug-fix release for the build-process
diff --git a/build.xml b/build.xml
index 1206eca..02b2417 100644
--- a/build.xml
+++ b/build.xml
@@ -35,13 +35,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 -->
 <project name="java-blas" basedir="." default="jar">
     <description>
-		This is the build script to compile and deploy the java-blas project.
-		It also generates JavaDoc from source in ../doc.
+        This is the build script to compile and deploy the java-blas project.
+        It also generates JavaDoc from source in ../doc.
     </description>
 
     <!-- Define directories -->
 
-    <property name="version" value="1.0.2" />
+    <property name="version" value="1.1" />
     <property name="src" value="${basedir}/src" />
     <property name="test" value="${basedir}/test" />
     <property name="bin" value="${basedir}/bin" />
@@ -91,7 +91,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
                     <include name="org/**" />
                     <include name="lib/**/lib*.so"/>
                     <include name="lib/**/*.dll"/>
-		    <include name="lib/**/*.jnilib"/>
+                    <include name="lib/**/*.jnilib"/>
                 </fileset>
                 <manifest>
                     <attribute name="Built-By" value="IDA Group, TU Berlin" />
@@ -126,7 +126,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
         <javac destdir="${bin}" encoding="utf-8" source="1.5" debug="on" compiler="javac1.5" target="1.5" fork="yes" nowarn="yes">
             <src path="${test}" />
             <classpath>
-                <pathelement location="${external}/junit.jar" />
+                <pathelement location="${external}/junit-4.8.2.jar" />
             </classpath>
         </javac>
     </target>
@@ -162,6 +162,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
         <static-class-to-float class="${pkgbase}.Geometry"/>
         <static-class-to-float class="${pkgbase}.MatrixFunctions"/>
         <static-class-to-float class="${pkgbase}.JavaBlas"/>
+        <static-class-to-float class="${pkgbase}.Singular"/>
+        <!--<static-class-to-float class="${pkgbase}.Decompose"/>-->
     </target>
     <target name="preprocess" description="run the ruby preprocessor on necessary files">
         <rjpp file="src/org/jblas/DoubleMatrix.java"/>
@@ -177,19 +179,21 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
         <copy todir="${bin}/lib">
             <fileset dir="${native-libs}">
                 <include name="dynamic/${os.name}/${os.arch}/*" />
+                <include name="dynamic/${os.name}/${os.arch}/**/*" />
             </fileset>
         </copy>
-        <create-jar filename="jblas-lean-${os.name}-${os.arch}-${version}.jar" />
+        <create-jar filename="jblas-dynamic-${os.name}-${os.arch}-${version}.jar" />
     </target>
 
-    <target name="static-lean-jar" depends="compile" description="create jblas.jar (local system, dynamic)">
+    <target name="static-lean-jar" depends="compile" description="create jblas.jar (local system, static)">
         <delete dir="${bin}/lib" />
         <copy todir="${bin}/lib">
             <fileset dir="${native-libs}">
                 <include name="static/${os.name}/${os.arch}/*" />
+                <include name="static/${os.name}/${os.arch}/**/*" />
             </fileset>
         </copy>
-        <create-jar filename="jblas-lean-${os.name}-${os.arch}-${version}.jar" />
+        <create-jar filename="jblas-static-${os.name}-${os.arch}-${version}.jar" />
     </target>
 
     <target name="jar" depends="compile" description="create multiplatform.jar (everything in native-libs/static)">
@@ -201,7 +205,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
         </copy>
         <create-jar filename="jblas-${version}.jar" />
     </target>
-    
+
     <target name="minimal-jar" depends="compile" description="create jblas-lean.jar (local system, dynamic, without shared libs)" >
         <delete dir="${bin}/lib" />
         <create-jar filename="jblas-minimal-${version}.jar" />
@@ -211,16 +215,16 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
     <target name="javadoc" depends="generate-float">
         <mkdir dir="${doc}" />
-        <javadoc packagenames="org.jblas.*" encoding="utf-8"
-		sourcepath="${src}" defaultexcludes="yes"
-		destdir="${doc}" author="true" version="true"
-		use="true" windowtitle="jblas" linksource="yes"
-			stylesheetfile="javadoc.css" overview="${src}/overview.html">
+        <javadoc packagenames="org.jblas*.*" encoding="utf-8"
+                 sourcepath="${src}" defaultexcludes="yes"
+                 destdir="${doc}" author="true" version="true"
+                 use="true" windowtitle="jblas" linksource="yes"
+                 stylesheetfile="javadoc.css" overview="${src}/overview.html">
             <doctitle>
-	<![CDATA[ <h1>jblas - Linear Algebra for Java (version ${version})</h1> ]]>
+                <![CDATA[ <h1>jblas - Linear Algebra for Java (version ${version})</h1> ]]>
             </doctitle>
             <bottom>
-	<![CDATA[ © 2008-2010 by Mikio L. Braun and contributors ]]>
+                <![CDATA[ © 2008-2010 by Mikio L. Braun and contributors ]]>
             </bottom>
         </javadoc>
     </target>
@@ -228,23 +232,29 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     <!-- testing -->
 
     <target name="test" depends="compile-test,jar">
-        <junit printsummary="yes" haltonfailure="yes" showoutput="yes">
+        <junit printsummary="yes" haltonfailure="yes" showoutput="yes" fork="yes">
             <classpath>
-                <!-- <pathelement location="${external}/junit.jar" /> -->
+                <pathelement location="${external}/junit-4.8.2.jar" />
                 <pathelement path="${bin}" />
             </classpath>
             <formatter type="plain" usefile="false" />
             <!-- <test name="${pkgbase}.TestDoubleMatrix" />
             <test name="${pkgbase}.TestEigen" />
-            <test name="${pkgbase}.ranges.IntervalRangeTest" /> -->
-            <test name="${pkgbase}.DecomposeTest" />
+            <test name="${pkgbase}.ranges.IntervalRangeTest" />
+            <test name="${pkgbase}.DecomposeTest" /> -->
+            <!-- <batchtest fork="yes" todir="${reports.tests}">
+                <fileset dir="${test}">
+                    <include name="**/*Test*.java"/>
+                </fileset>
+            </batchtest>-->
+            <test name="${pkgbase}.TestBlasDoubleComplex" />
         </junit>
     </target>
     <target name="all" depends="jar, javadoc">
     </target>
 
     <!-- taring everything up for distribution -->
-    
+
     <target name="tar" depends="javadoc">
         <tar destfile="jblas-${version}.tgz" compression="gzip">
             <tarfileset dir="${basedir}" prefix="jblas-${version}">
@@ -270,7 +280,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
                 <include name="native/jblas_arch_flavor.c" />
             </tarfileset>
             <tarfileset dir="${basedir}" prefix="jblas-${version}" mode="755"
-		    includes="configure" />
+                        includes="configure" />
         </tar>
     </target>
 </project>
diff --git a/config/config_cc.rb b/config/config_cc.rb
index eb232e2..7343bad 100644
--- a/config/config_cc.rb
+++ b/config/config_cc.rb
@@ -45,7 +45,7 @@ configure :cc => 'CC'
 desc 'Setting up gcc and flags'
 configure 'CC', 'CFLAGS' => ['OS_NAME', 'OS_ARCH', 'JAVA_HOME'] do
   os_name = Config::CONFIG['OS_NAME']
-  java_home = Config::CONFIG['JAVA_HOME'].escape
+  java_home = Config::CONFIG['JAVA_HOME']
   case os_name
   when 'Linux'
     Path.check_cmd('gcc', 'make', 'ld')
diff --git a/config/config_fortran.rb b/config/config_fortran.rb
index a2f324a..d3c1df0 100644
--- a/config/config_fortran.rb
+++ b/config/config_fortran.rb
@@ -52,6 +52,7 @@ configure 'F77', 'LD' => [:os_arch, :cc] do
   if CONFIG['OS_NAME'] == 'Mac\ OS\ X'
     CONFIG['LD'] = CONFIG['CC']
     CONFIG['F77'] = 'gfortran-mp-4.3'
+    CONFIG['CCC'] = 'c99'
   else
     g77 = Path.where('g77')
     gfortran = Path.where('gfortran')
@@ -59,12 +60,15 @@ configure 'F77', 'LD' => [:os_arch, :cc] do
     if g77
       CONFIG['LD'] = 'g77'
       CONFIG['F77'] = 'g77'
+      CONFIG['CCC'] = 'f2c'
     elsif gfortran
       CONFIG['F77'] = 'gfortran'
       CONFIG['LD'] = CONFIG['CC']
+      CONFIG['CCC'] = 'c99'
     elsif f77
       CONFIG['F77'] = 'f77'
       CONFIG['LD'] = 'f77'
+      CONFIG['CCC'] = 'f2c'
     else
       CONFIG.fail <<EOS.indent(2)
 Either g77 or gfortran have to be installed to compile against the
diff --git a/config/config_lapack_sources.rb b/config/config_lapack_sources.rb
index 1bb67d9..37757d0 100644
--- a/config/config_lapack_sources.rb
+++ b/config/config_lapack_sources.rb
@@ -40,14 +40,17 @@ require 'config/string_ext'
 include Config
 
 configure :lapack_sources => 'LAPACK_HOME'
+def check_lapack_home(lapack_home)
+  Path.check_files(lapack_home,
+        ['BLAS', 'SRC', 'dgemm.f'],
+        ['SRC', 'dsyev.f'])
+end
 
 desc 'search for lapack sources (configure by --lapack=dir)'
 configure 'LAPACK_HOME' do
   lapack_home = $opts.get :lapack, './lapack-lite-3.1.1'
   begin
-    Path.check_files(lapack_home,
-      ['BLAS', 'SRC', 'dgemm.f'],
-      ['SRC', 'dsyev.f'])
+    check_lapack_home(lapack_home)
   rescue ConfigError => e
     if $opts.defined? :download_lapack
       puts "trying to download lapack (about 5M)"
diff --git a/config/config_libs.rb b/config/config_libs.rb
index e9948a1..0ca80c4 100644
--- a/config/config_libs.rb
+++ b/config/config_libs.rb
@@ -67,7 +67,8 @@ ATLAS_REQUIRED_SYMBOLS = [
 
 LAPACK_REQUIRED_SYMBOLS = [ 'dsyev_', 'daxpy_' ]
 
-ATLAS_LIBS = %w(lapack lapack_fortran lapack_atlas f77blas atlas cblas)
+ATLAS_LIBS = %w(lapack lapack_fortran lapack_atlas f77blas cblas atlas)
+PT_ATLAS_LIBS = %w(lapack lapack_fortran lapack_atlas ptf77blas ptcblas atlas)
 LAPACK_LIBS = %w(lapack_fortran lapack blas_fortran blas)
 
 configure :libs => 'LOADLIBES'
@@ -110,11 +111,15 @@ configure 'BUILD_TYPE' do
 end
 
 desc 'looking for libraries...'
-configure 'LOADLIBES' => ['LINKAGE_TYPE', :libpath, 'F77', 'BUILD_TYPE'] do
+configure 'LOADLIBES' => ['LINKAGE_TYPE', :libpath, 'F77', 'BUILD_TYPE', 'OS_ARCH'] do
 
   case CONFIG['BUILD_TYPE']
   when 'atlas'
-    libs = ATLAS_LIBS
+    if $opts.defined? :ptatlas
+      libs = PT_ATLAS_LIBS
+    else
+      libs = ATLAS_LIBS
+    end
     syms = ATLAS_REQUIRED_SYMBOLS
   when 'lapack'
     libs = LAPACK_LIBS
@@ -129,7 +134,7 @@ configure 'LOADLIBES' => ['LINKAGE_TYPE', :libpath, 'F77', 'BUILD_TYPE'] do
     CONFIG['LDFLAGS'] += result.values.uniq.map {|s| '-L' + s.escape}
     CONFIG['LOADLIBES'] += result.keys.map {|s| '-l' + s.escape}
   when 'static'
-    #CONFIG['LOADLIBES'] += ['-Wl,--allow-multiple-definition'] unless CONFIG['OS_NAME'] == 'Mac\ OS\ X'
+    CONFIG['LOADLIBES'] += ['-Wl,-z,muldefs'] unless CONFIG['OS_NAME'] == 'Mac\ OS\ X' or CONFIG['OS_NAME'] == 'Windows'
 
     # Add the libraries with their full path to the command line.
     # We have to sort them in the order as they appear in +libs+, otherwise
@@ -138,7 +143,17 @@ configure 'LOADLIBES' => ['LINKAGE_TYPE', :libpath, 'F77', 'BUILD_TYPE'] do
       sort {|x, y| libs.index(x) <=> libs.index(y)}.
       map {|s| File.join(result[s], LibHelpers.libname(s)).escape }
     if CONFIG['F77'] == 'gfortran'
-      CONFIG['LOADLIBES'] += ['-l:libgfortran.a']
+      puts CONFIG['OS_ARCH']
+      if CONFIG['OS_NAME'] == 'Linux' and CONFIG['OS_ARCH'] == 'amd64'
+	CONFIG['LOADLIBES'] += ['-lgfortran']
+        puts <<EOS
+WARNING: on 64bit Linux, I cannot link the gfortran library into the shared library
+because it's usually not compiled with -fPIC. This means that you need to
+have libgfortran.so installed on your target system. Sorry for the inconvenience!
+EOS
+      else
+        CONFIG['LOADLIBES'] += ['-l:libgfortran.a']
+      end
     end
     if CONFIG['OS_NAME'] == 'Mac\ OS\ X'
       CONFIG['LOADLIBES'] += ['/opt/local/lib/gcc43/libgfortran.a']
@@ -151,4 +166,4 @@ if __FILE__ == $0
   $opts = Opts.new(ARGV)
   Config.run :libs
   Config::CONFIG.dump($stdout)
-end
\ No newline at end of file
+end
diff --git a/examples/complex_svd.rb b/examples/complex_svd.rb
new file mode 100644
index 0000000..6ebf7fd
--- /dev/null
+++ b/examples/complex_svd.rb
@@ -0,0 +1,34 @@
+require 'java'
+require 'jblas-1.0.3.jar'
+
+java_import 'org.jblas.ComplexDoubleMatrix'
+java_import 'org.jblas.DoubleMatrix'
+java_import 'org.jblas.Singular'
+
+Matrix = ComplexDoubleMatrix
+
+M = 4
+N = 3
+
+m = Matrix.new(M, N)
+
+for i in 0...M
+  for j in 0...N
+    m.put(i, j, i+j, i*j)
+    #m.put(i, j, i+j)
+  end
+end
+
+puts m.dup
+
+u, s, v = Singular.sparseSVD(m) 
+
+s = Matrix.diag(s)
+
+puts "u = #{u}"
+puts "s = #{s}"
+puts "v = #{v}"
+
+puts u.mmul(s).mmul(v.transpose).sub(m).norm2()
+
+puts Singular.SVDValues(m)
diff --git a/fortranwrapper.dump b/fortranwrapper.dump
index 09c16f4..9a5b536 100644
Binary files a/fortranwrapper.dump and b/fortranwrapper.dump differ
diff --git a/nbproject/project.xml b/nbproject/project.xml
index 9e71fd8..7742e51 100644
--- a/nbproject/project.xml
+++ b/nbproject/project.xml
@@ -117,7 +117,7 @@
             </ide-actions>
             <export>
                 <type>jar</type>
-                <location>jblas-0.3.jar</location>
+                <location>jblas-1.0.2.jar</location>
                 <build-target>jar</build-target>
             </export>
             <view>
@@ -169,7 +169,7 @@
         <java-data xmlns="http://www.netbeans.org/ns/freeform-project-java/2">
             <compilation-unit>
                 <package-root>src</package-root>
-                <built-to>jblas-0.3.jar</built-to>
+                <built-to>jblas-1.0.2.jar</built-to>
                 <source-level>1.5</source-level>
             </compilation-unit>
             <compilation-unit>
diff --git a/scripts/fortran/java.rb b/scripts/fortran/java.rb
index 448a3cf..a6929ea 100644
--- a/scripts/fortran/java.rb
+++ b/scripts/fortran/java.rb
@@ -96,10 +96,10 @@ module Fortran
       end
       
       result = if array or comment =~ /output/
-                 ArrayTypeMap[basetype]
-               else
-                 StdTypeMap[basetype]
-               end
+        ArrayTypeMap[basetype]
+      else
+        StdTypeMap[basetype]
+      end
 
       if not result
         raise ArgumentError, "Don't know how to convert '#{to_s}'"
@@ -107,7 +107,7 @@ module Fortran
       
       return result
     end
-    
+
     def to_c
       javatype = to_java
       if javatype =~ /\[\]$/
@@ -195,7 +195,7 @@ module Fortran
         @arrays = [] # already seen arrays
       end
       
-  public
+      public
       # Generate the JNI-wrapper code for the fortran routine.
       # This is a bit more complex and calls other routines for the different
       # parts.
@@ -302,16 +302,16 @@ EOS
 
       def r; @routine; end
 
-  private ############################################################
+      private ############################################################
 
       # convenience accessor
 
       def java_return_type
-          if r.return_type.basetype == 'VOID' and r.args.last == 'INFO'
-            'int'
-          else
-            r.return_type.to_java
-          end        
+        if r.return_type.basetype == 'VOID' and r.args.last == 'INFO'
+          'int'
+        else
+          r.return_type.to_java
+        end
       end
 
       def make_fortran_fct_name name
@@ -370,7 +370,11 @@ EOS
         elsif javatype =~ /\[\]/
           Java::BufferArgument.new(self)
         elsif javatype =~ /Complex/
-          Java::ComplexArgument.new(self)
+          if $complexcc == 'f2c'
+            Java::ComplexF2CArgument.new(self)
+          else
+            Java::ComplexC99Argument.new(self)
+          end 
         elsif javatype == 'char'
           Java::CharArgument.new(self)
         else
@@ -384,11 +388,19 @@ EOS
       # Stuff for workspaces below
       #
 
+      def workspace_size_factor(t)
+        if t.basetype =~ /COMPLEX/
+          '*2'
+        else
+          ''
+        end
+      end
+
       # Declarations for workspace arrays
       def declare_workspace_arrays
         r.gen_each_arg do |n, t|
           if r.workspace_argument? n
-            "    #{t.to_java} #{n.downcase} = new #{t.to_java[0..-3]}[1];"
+            "    #{t.to_java} #{n.downcase} = new #{t.to_java[0..-3]}[1#{workspace_size_factor(t)}];"
           elsif r.workspace_size_argument? n
             "    #{t.to_java} #{n.downcase};"
           end
@@ -399,23 +411,23 @@ EOS
       def workspace_query
         return "#{r.name.downcase}(" +
           r.gen_each_arg(', ') do |n,t|
-            if r.workspace_size_argument? n
-              '-1'
-            elsif n != 'INFO'
-              if t.to_java =~ /\[\]\Z/
-                if r.workspace_argument? n
-                  "#{n.downcase}, 0"
-                else
-                  # replace array arguments by their type
-                  dummy_name = t.to_java.gsub /\[\]/, 'Dummy'
-                  #"#{t.to_java} #{n.downcase}, #{n.downcase}Idx"
-                  "#{dummy_name}, 0"
-                end
+          if r.workspace_size_argument? n
+            '-1'
+          elsif n != 'INFO'
+            if t.to_java =~ /\[\]\Z/
+              if r.workspace_argument? n
+                "#{n.downcase}, 0"
               else
-                n.downcase
+                # replace array arguments by their type
+                dummy_name = t.to_java.gsub /\[\]/, 'Dummy'
+                #"#{t.to_java} #{n.downcase}, #{n.downcase}Idx"
+                "#{dummy_name}, 0"
               end
+            else
+              n.downcase
             end
-          end + ")"
+          end
+        end + ")"
       end
 
       # allocate the actual workspaces
@@ -423,7 +435,8 @@ EOS
         r.gen_each_arg do |n, t|
           if r.workspace_argument? n
             n = n.downcase
-            "    l#{n} = (int) #{n}[0]; #{n} = new #{t.to_java[0..-3]}[l#{n}];"
+            factor = workspace_size_factor(t)
+            "    l#{n} = (int) #{n}[0]; #{n} = new #{t.to_java[0..-3]}[l#{n}#{factor}];"
           end
         end
       end
@@ -432,18 +445,18 @@ EOS
       def call_with_workspaces
         return "#{r.name.downcase}(" +
           r.gen_each_arg(', ') do |n,t|
-            if n != 'INFO'
-              if t.to_java =~ /\[\]\Z/
-                if r.workspace_argument? n
-                  "#{n.downcase}, 0"
-                else
-                  "#{n.downcase}, #{n.downcase}Idx"
-                end
+          if n != 'INFO'
+            if t.to_java =~ /\[\]\Z/
+              if r.workspace_argument? n
+                "#{n.downcase}, 0"
               else
-                n.downcase
+                "#{n.downcase}, #{n.downcase}Idx"
               end
+            else
+              n.downcase
             end
-          end + ")"
+          end
+        end + ")"
       end
     end
 
@@ -539,116 +552,161 @@ EOS
   if (#{name}) {
 EOS
         unless code.arrays.empty?
-          code.arrays.each do |a, t|
-            if t == basectype
-              code.conversions << "if((*env)->IsSameObject(env, #{name}, #{a}) == JNI_TRUE)\n      #{name}PtrBase = #{a}PtrBase;\n    else\n      "
-            end
+        code.arrays.each do |a, t|
+          if t == basectype
+            code.conversions << "if((*env)->IsSameObject(env, #{name}, #{a}) == JNI_TRUE)\n      #{name}PtrBase = #{a}PtrBase;\n    else\n      "
           end
         end
-        code.conversions << <<EOS
+      end
+      code.conversions << <<EOS
 #{name}PtrBase = (*env)->Get#{basectype[1..-1].capitalize}ArrayElements(env, #{name}, NULL);
     #{name}Ptr = #{name}PtrBase + #{'2*' if type.basetype =~ /COMPLEX/}#{name}Idx;
   }
 EOS
         
-        # and releasing the stuff again...
-        release = []
-        release << "  if(#{name}PtrBase) {"
-        release << "    (*env)->Release#{basectype[1..-1].capitalize}ArrayElements(env, #{name}, #{name}PtrBase, #{@type.output? ? '0' : 'JNI_ABORT'});"
-        code.arrays.each do |a, t|
-          if t == basectype
-            release << "    if (#{name}PtrBase == #{a}PtrBase)"
-            release << "      #{a}PtrBase = 0;"
-          end
+      # and releasing the stuff again...
+      release = []
+      release << "  if(#{name}PtrBase) {"
+      release << "    (*env)->Release#{basectype[1..-1].capitalize}ArrayElements(env, #{name}, #{name}PtrBase, #{@type.output? ? '0' : 'JNI_ABORT'});"
+      code.arrays.each do |a, t|
+        if t == basectype
+          release << "    if (#{name}PtrBase == #{a}PtrBase)"
+          release << "      #{a}PtrBase = 0;"
         end
-        release << "    #{name}PtrBase = 0;"
-        release << "  }\n"
+      end
+      release << "    #{name}PtrBase = 0;"
+      release << "  }\n"
 
-        code.release_arrays = release.join("\n") + code.release_arrays
+      code.release_arrays = release.join("\n") + code.release_arrays
                    
-        # store information about the arrays we have already handled
-        code.arrays << [name, basectype]
-      end
+      # store information about the arrays we have already handled
+      code.arrays << [name, basectype]
+    end
 
-      def make_call_arg
-        code.call_args << "#{name}Ptr"
-      end
+    def make_call_arg
+      code.call_args << "#{name}Ptr"
     end
+  end
 
-    #----------------------------------------------------------------------
-    # For complex values (scalars only!): fortran returns the value in the
-    # first argument, therefore declare the return value, modify the 
-    # fortran argument list, and generate the *Complex object.
-    #
-    # for arguments, extract the values from the Java object
-    class ComplexArgument < GenericArgument
-      def make_fortran_return_type
-        code.fortran_args << javatype + " *"
-        code.fortran_return_type << "void"
-      end
+  #----------------------------------------------------------------------
+  # For complex values (scalars only!): fortran returns the value in the
+  # first argument, therefore declare the return value, modify the
+  # fortran argument list, and generate the *Complex object.
+  #
+  # for arguments, extract the values from the Java object
+  class ComplexF2CArgument < GenericArgument
+    def make_fortran_return_type
+      code.fortran_args << javatype + " *"
+      code.fortran_return_type << "void"
+    end
 
-      def make_fortran_arg
-        code.fortran_args << javatype + " *"
-      end
+    def make_fortran_arg
+      code.fortran_args << javatype + " *"
+    end
 
-      def make_convert_return_type
-        code.conversions << "  #{javatype} retval;\n"
-      end
+    def make_convert_return_type
+      code.conversions << "  #{javatype} retval;\n"
+    end
 
-      def make_call_return
-        code.call_post << "\n  return create#{javatype}(env, &retval);"
-        code.call_args << "&retval"
-      end
+    def make_call_return
+      code.call_post << "\n  return create#{javatype}(env, &retval);"
+      code.call_args << "&retval"
+    end
 
-      def make_convert_arg
-        code.conversions << "  #{javatype} #{name}Cplx;\n"
-        code.conversions << "  get#{javatype}(env, #{name}, &#{name}Cplx);\n"
-      end
+    def make_convert_arg
+      code.conversions << "  #{javatype} #{name}Cplx;\n"
+      code.conversions << "  get#{javatype}(env, #{name}, &#{name}Cplx);\n"
+    end
 
-      def make_call_arg
-        code.call_args << "&#{name}Cplx"
-      end
+    def make_call_arg
+      code.call_args << "&#{name}Cplx"
     end
+  end
     
-    #----------------------------------------------------------------------
-    # For characters: This handles only the first byte. Now idea if it
-    # is really worth dealing with UTF-8 and all the rest...
-    class CharArgument < GenericArgument
-      def make_convert_arg
-        code.conversions << "  char #{name}Chr = (char) #{name};\n"
+  #----------------------------------------------------------------------
+  # For complex values (scalars only!): fortran returns the value in the
+  # first argument, therefore declare the return value, modify the
+  # fortran argument list, and generate the *Complex object.
+  #
+  # for arguments, extract the values from the Java object
+  class ComplexC99Argument < GenericArgument
+    def c99type
+      case javatype
+      when 'ComplexFloat'
+        'float complex'
+      when 'ComplexDouble'
+        'double complex'
       end
+    end
 
-      def make_call_arg
-        code.call_args << "&#{name}Chr"
-      end
+    def make_fortran_return_type
+      # code.fortran_args << javatype + " *"
+      code.fortran_return_type << c99type
+    end
 
-      def make_fortran_arg
-        code.fortran_args << javatype + " *"
-      end
+    def make_fortran_arg
+      code.fortran_args << c99type + " *"
+    end
+
+    def make_call_return
+      code.call_pre << c99type + " retval = "
+      code.call_post << "\n  return create#{javatype}(env, retval);"
+      #code.call_args << "&retval"
+    end
+
+    def make_convert_arg
+      code.conversions << "  #{c99type} #{name}Cplx;\n"
+      code.conversions << "  #{name}Cplx = get#{javatype}(env, #{name});\n"
+    end
+
+    def make_call_arg
+      code.call_args << "&#{name}Cplx"
+    end
+
+    def make_return_type
+      code.return_type = 'jobject'
+    end
+  end
+
+  #----------------------------------------------------------------------
+  # For characters: This handles only the first byte. Now idea if it
+  # is really worth dealing with UTF-8 and all the rest...
+  class CharArgument < GenericArgument
+    def make_convert_arg
+      code.conversions << "  char #{name}Chr = (char) #{name};\n"
+    end
+
+    def make_call_arg
+      code.call_args << "&#{name}Chr"
+    end
+
+    def make_fortran_arg
+      code.fortran_args << javatype + " *"
     end
+  end
     
-    #----------------------------------------------------------------------
-    # For nil arguments (only return value): don't add a return value
-    class NilArgument < GenericArgument
-      def make_call_return
-      end
+  #----------------------------------------------------------------------
+  # For nil arguments (only return value): don't add a return value
+  class NilArgument < GenericArgument
+    def make_call_return
     end
+  end
 
-    #----------------------------------------------------------------------
-    # For info arguments
-    class InfoArgument < GenericArgument
-      def make_decl_arg
-        code.return_type = 'jint'
-      end
+  #----------------------------------------------------------------------
+  # For info arguments
+  class InfoArgument < GenericArgument
+    def make_decl_arg
+      code.return_type = 'jint'
+    end
 
-      def make_convert_arg
-        code.conversions << "  int info;\n"
-        code.call_post << "\n  return info;"
-      end
+    def make_convert_arg
+      code.conversions << "  int info;\n"
+      code.call_post << "\n  return info;"
+    end
 
-      def make_fortran_arg
-        code.fortran_args << ctype[1...-5] + " *"
-      end
+    def make_fortran_arg
+      code.fortran_args << ctype[1...-5] + " *"
     end
-  end # module Java
+  end
+end # module Java
 end # module Fortran
diff --git a/scripts/fortranwrapper.rb b/scripts/fortranwrapper.rb
index 7ebf48f..e3f4e69 100644
--- a/scripts/fortranwrapper.rb
+++ b/scripts/fortranwrapper.rb
@@ -59,11 +59,13 @@ FORTRANWRAPPER_DUMP = 'fortranwrapper.dump'
 
 $here = false
 $force = false
+$complexcc = 'c99'
 OptionParser.new do |opts|
   opts.banner = Banner
   
   opts.on("-h", "--here", "output files here") {|v| $here = true}
   opts.on("-f", "--force", "force parsing of fortran file") {|v| $force = true}
+  opts.on("-c", "--complexcc [TYPE]", "set complex calling convention (either c99 or f2c)") {|v| $complexcc = v}
 end.parse!
 
 if ARGV.size < 3
@@ -71,6 +73,8 @@ if ARGV.size < 3
   exit
 end
 
+puts "complex calling convention = #{$complexcc}"
+
 package = ARGV[0]
 klass = ARGV[1]
 
diff --git a/scripts/java-impl.c b/scripts/java-impl.c
index 76c0356..74b6ec4 100644
--- a/scripts/java-impl.c
+++ b/scripts/java-impl.c
@@ -40,8 +40,6 @@
  * For convenience, we define some typedefs here which have the
  * same name as certain Java types, but their implementation differs (of course)
  */
-typedef struct { float real, imag; } ComplexFloat;
-typedef struct { double real, imag; } ComplexDouble;
 typedef char *String;
 
 typedef char ByteBuffer;
@@ -64,6 +62,10 @@ static jobject createObject(JNIEnv *env, const char *className, const char *sign
   va_end(args);
 }
 
+<% if $complexcc == 'f2c' %>
+typedef struct { float real, imag; } ComplexFloat;
+typedef struct { double real, imag; } ComplexDouble;
+
 static jobject createComplexFloat(JNIEnv *env, ComplexFloat *fc)
 {
   return createObject(env, CORE_PACKAGE "ComplexFloat", "(FF)V", fc->real, fc->imag);
@@ -93,6 +95,38 @@ static void getComplexDouble(JNIEnv *env, jobject dc, ComplexDouble *result)
   result->real = (*env)->GetDoubleField(env, dc, reField);
   result->imag = (*env)->GetDoubleField(env, dc, imField);
 }
+<% else %>
+#include <complex.h>
+typedef float complex ComplexFloat;
+typedef double complex ComplexDouble;
+
+static jobject createComplexFloat(JNIEnv *env, ComplexFloat fc) {
+  return createObject(env, CORE_PACKAGE "ComplexFloat", "(FF)V", crealf(fc), cimagf(fc));
+}
+
+static jobject createComplexDouble(JNIEnv *env, ComplexDouble dc)
+{
+  return createObject(env, CORE_PACKAGE "ComplexDouble", "(DD)V", creal(dc), cimag(dc));
+}
+
+static ComplexFloat getComplexFloat(JNIEnv *env, jobject fc)
+{
+  jclass klass = (*env)->FindClass(env, CORE_PACKAGE "ComplexFloat");
+  jfieldID reField = (*env)->GetFieldID(env, klass, "r", "F");
+  jfieldID imField = (*env)->GetFieldID(env, klass, "i", "F");
+
+  return (*env)->GetFloatField(env, fc, reField) + I*(*env)->GetFloatField(env, fc, imField);
+}
+
+static ComplexDouble getComplexDouble(JNIEnv *env, jobject dc)
+{
+  jclass klass = (*env)->FindClass(env, CORE_PACKAGE "ComplexDouble");
+  jfieldID reField = (*env)->GetFieldID(env, klass, "r", "D");
+  jfieldID imField = (*env)->GetFieldID(env, klass, "i", "D");
+
+  return (*env)->GetDoubleField(env, dc, reField) + I*(*env)->GetDoubleField(env, dc, imField);
+}
+<% end %>
 
 static void throwIllegalArgumentException(JNIEnv *env, const char *message)
 {
diff --git a/scripts/rjpp.rb b/scripts/rjpp.rb
index 458aaf6..465c8cd 100644
--- a/scripts/rjpp.rb
+++ b/scripts/rjpp.rb
@@ -38,7 +38,7 @@
 #
 # Contrary to the "usual" cpp, commands will be expanded "in-place" in
 # an idempotent fashion (meaning that you can run rjpp on the same
-# file more than once.
+# file more than once.)
 #
 # Directives are either given as
 #
@@ -68,8 +68,10 @@ def doc(s)
   "/** " + s + " */"
 end
 
+filename = ARGV[0]
+
 #open file
-file = open(ARGV[0], 'r').read
+file = open(filename, 'r').read
 saved_file = file
 
 # remove existing expansions
diff --git a/src/org/jblas/ComplexDoubleMatrix.java b/src/org/jblas/ComplexDoubleMatrix.java
index 7d617ec..4c7fe85 100644
--- a/src/org/jblas/ComplexDoubleMatrix.java
+++ b/src/org/jblas/ComplexDoubleMatrix.java
@@ -687,7 +687,7 @@ public class ComplexDoubleMatrix {
 	public ComplexDoubleMatrix dup() {
 		ComplexDoubleMatrix out = new ComplexDoubleMatrix(rows, columns);
 
-                System.arraycopy(out.data, 0, data, 0, 2 * length);
+                JavaBlas.rcopy(2*length, data, 0, 1, out.data, 0, 1);
 		
 		return out;
 	}
@@ -708,7 +708,13 @@ public class ComplexDoubleMatrix {
 		return this;
 	}
 
-	public ComplexDoubleMatrix put(int rowIndex, int columnIndex, ComplexDouble value) {
+	public ComplexDoubleMatrix put(int rowIndex, int columnIndex, double realValue, double complexValue) {
+		data[2*index(rowIndex, columnIndex)] =  realValue;
+		data[2*index(rowIndex, columnIndex)+1] =  complexValue;
+		return this;
+	}
+
+        public ComplexDoubleMatrix put(int rowIndex, int columnIndex, ComplexDouble value) {
 		int i = 2*index(rowIndex, columnIndex);
 		data[i] = value.real(); data[i+1] = value.imag();
 		return this;
@@ -764,6 +770,12 @@ public class ComplexDoubleMatrix {
 		data[2*i] = v;
 		return this;
 	}
+
+        public ComplexDoubleMatrix put(int i, double r, double c) {
+            data[2*i] = r;
+            data[2*i+1] = c;
+            return this;
+        }
 	
 	public ComplexDoubleMatrix put(int i, ComplexDouble v) {
 		data[2*i] = v.real();
diff --git a/src/org/jblas/ComplexFloatMatrix.java b/src/org/jblas/ComplexFloatMatrix.java
index 0f2e6e6..9bb8a8f 100644
--- a/src/org/jblas/ComplexFloatMatrix.java
+++ b/src/org/jblas/ComplexFloatMatrix.java
@@ -687,7 +687,7 @@ public class ComplexFloatMatrix {
 	public ComplexFloatMatrix dup() {
 		ComplexFloatMatrix out = new ComplexFloatMatrix(rows, columns);
 
-                System.arraycopy(out.data, 0, data, 0, 2 * length);
+                JavaBlas.rcopy(2*length, data, 0, 1, out.data, 0, 1);
 		
 		return out;
 	}
@@ -708,7 +708,13 @@ public class ComplexFloatMatrix {
 		return this;
 	}
 
-	public ComplexFloatMatrix put(int rowIndex, int columnIndex, ComplexFloat value) {
+	public ComplexFloatMatrix put(int rowIndex, int columnIndex, float realValue, float complexValue) {
+		data[2*index(rowIndex, columnIndex)] =  realValue;
+		data[2*index(rowIndex, columnIndex)+1] =  complexValue;
+		return this;
+	}
+
+        public ComplexFloatMatrix put(int rowIndex, int columnIndex, ComplexFloat value) {
 		int i = 2*index(rowIndex, columnIndex);
 		data[i] = value.real(); data[i+1] = value.imag();
 		return this;
@@ -764,6 +770,12 @@ public class ComplexFloatMatrix {
 		data[2*i] = v;
 		return this;
 	}
+
+        public ComplexFloatMatrix put(int i, float r, float c) {
+            data[2*i] = r;
+            data[2*i+1] = c;
+            return this;
+        }
 	
 	public ComplexFloatMatrix put(int i, ComplexFloat v) {
 		data[2*i] = v.real();
diff --git a/src/org/jblas/ConvertsToDoubleMatrix.java b/src/org/jblas/ConvertsToDoubleMatrix.java
index 890fbd2..4cb189f 100644
--- a/src/org/jblas/ConvertsToDoubleMatrix.java
+++ b/src/org/jblas/ConvertsToDoubleMatrix.java
@@ -6,8 +6,9 @@
 package org.jblas;
 
 /**
- *
- * @author mikio
+ * EXPERIMENTAL, not yet used for anything usefull... .
+ * 
+ * @author Mikio L. Braun
  */
 public interface ConvertsToDoubleMatrix {
     public DoubleMatrix convertToDoubleMatrix();
diff --git a/src/org/jblas/Decompose.java b/src/org/jblas/Decompose.java
index 27375e6..a3ada26 100644
--- a/src/org/jblas/Decompose.java
+++ b/src/org/jblas/Decompose.java
@@ -14,6 +14,7 @@ import static org.jblas.util.Functions.min;
  */
 public class Decompose {
 
+//STOP
     /**
      * Class to hold an LU decomposition result.
      *
@@ -33,6 +34,7 @@ public class Decompose {
             this.p = p;
         }
     }
+//START
 
     /**
      * Compute LU Decomposition of a general matrix.
diff --git a/src/org/jblas/DoubleMatrix.java b/src/org/jblas/DoubleMatrix.java
index 7d404b8..03ba9d3 100644
--- a/src/org/jblas/DoubleMatrix.java
+++ b/src/org/jblas/DoubleMatrix.java
@@ -288,8 +288,7 @@ public class DoubleMatrix implements Serializable {
     /** The actual data stored by rows (that is, row 0, row 1...). */
     public double[] data = null; // rows are contiguous
     public static final DoubleMatrix EMPTY = new DoubleMatrix();
-
-     static final long serialVersionUID = -1249281332731183060L;
+    static final long serialVersionUID = -1249281332731183060L;
 
     /**************************************************************************
      *
@@ -1027,6 +1026,7 @@ public class DoubleMatrix implements Serializable {
      * DoubleMatrix which has the same size and the maximal absolute
      * difference in matrix elements is smaller thatn 1e-6.
      */
+    @Override
     public boolean equals(Object o) {
         if (!(o instanceof DoubleMatrix)) {
             return false;
@@ -1043,6 +1043,15 @@ public class DoubleMatrix implements Serializable {
         return diff.max() / (rows * columns) < 1e-6;
     }
 
+    @Override
+    public int hashCode() {
+        int hash = 7;
+        hash = 83 * hash + this.rows;
+        hash = 83 * hash + this.columns;
+        hash = 83 * hash + Arrays.hashCode(this.data);
+        return hash;
+    }
+    
     /** Resize the matrix. All elements will be set to zero. */
     public void resize(int newRows, int newColumns) {
         rows = newRows;
@@ -1379,17 +1388,12 @@ public class DoubleMatrix implements Serializable {
         return array;
     }
 
-    /** Convert matrix to FloatMatrix. */
-    public FloatMatrix toFloatMatrix() {
-        FloatMatrix result = new FloatMatrix(rows, columns);
-
-        for (int c = 0; c < columns; c++) {
-            for (int r = 0; r < rows; r++) {
-                result.put(r, c, (float) get(r, c));
-            }
-        }
-
-        return result;
+    public FloatMatrix toFloat() {
+         FloatMatrix result = new FloatMatrix(rows, columns);
+         for (int i = 0; i < length; i++) {
+            result.put(i, (float) get(i));
+         }
+         return result;
     }
 
     /**
@@ -2307,6 +2311,10 @@ public class DoubleMatrix implements Serializable {
         return rowSums().divi(columns);
     }
 
+    /************************************************************************
+     * Column and rows access.
+     */
+
     /** Get a copy of a column. */
     public DoubleMatrix getColumn(int c) {
         return getColumn(c, new DoubleMatrix(rows, 1));
diff --git a/src/org/jblas/Eigen.java b/src/org/jblas/Eigen.java
index 8a40d7c..1eb9e09 100644
--- a/src/org/jblas/Eigen.java
+++ b/src/org/jblas/Eigen.java
@@ -112,6 +112,7 @@ public class Eigen {
             }
             return new ComplexDoubleMatrix[] { V, ComplexDoubleMatrix.diag(E) };
         }
+
 //BEGIN
   // The code below has been automatically generated.
   // DO NOT EDIT!
@@ -184,5 +185,6 @@ public class Eigen {
             }
             return new ComplexFloatMatrix[] { V, ComplexFloatMatrix.diag(E) };
         }
+
 //END
 }
diff --git a/src/org/jblas/FloatMatrix.java b/src/org/jblas/FloatMatrix.java
index 46a473f..6dac014 100644
--- a/src/org/jblas/FloatMatrix.java
+++ b/src/org/jblas/FloatMatrix.java
@@ -288,8 +288,7 @@ public class FloatMatrix implements Serializable {
     /** The actual data stored by rows (that is, row 0, row 1...). */
     public float[] data = null; // rows are contiguous
     public static final FloatMatrix EMPTY = new FloatMatrix();
-
-     static final long serialVersionUID = -1249281332731183060L;
+    static final long serialVersionUID = -1249281332731183060L;
 
     /**************************************************************************
      *
@@ -1027,6 +1026,7 @@ public class FloatMatrix implements Serializable {
      * FloatMatrix which has the same size and the maximal absolute
      * difference in matrix elements is smaller thatn 1e-6.
      */
+    @Override
     public boolean equals(Object o) {
         if (!(o instanceof FloatMatrix)) {
             return false;
@@ -1043,6 +1043,15 @@ public class FloatMatrix implements Serializable {
         return diff.max() / (rows * columns) < 1e-6;
     }
 
+    @Override
+    public int hashCode() {
+        int hash = 7;
+        hash = 83 * hash + this.rows;
+        hash = 83 * hash + this.columns;
+        hash = 83 * hash + Arrays.hashCode(this.data);
+        return hash;
+    }
+    
     /** Resize the matrix. All elements will be set to zero. */
     public void resize(int newRows, int newColumns) {
         rows = newRows;
@@ -1379,17 +1388,12 @@ public class FloatMatrix implements Serializable {
         return array;
     }
 
-    /** Convert matrix to FloatMatrix. */
-    public FloatMatrix toFloatMatrix() {
-        FloatMatrix result = new FloatMatrix(rows, columns);
-
-        for (int c = 0; c < columns; c++) {
-            for (int r = 0; r < rows; r++) {
-                result.put(r, c, (float) get(r, c));
-            }
-        }
-
-        return result;
+    public FloatMatrix toFloat() {
+         FloatMatrix result = new FloatMatrix(rows, columns);
+         for (int i = 0; i < length; i++) {
+            result.put(i, (float) get(i));
+         }
+         return result;
     }
 
     /**
@@ -2307,6 +2311,10 @@ public class FloatMatrix implements Serializable {
         return rowSums().divi(columns);
     }
 
+    /************************************************************************
+     * Column and rows access.
+     */
+
     /** Get a copy of a column. */
     public FloatMatrix getColumn(int c) {
         return getColumn(c, new FloatMatrix(rows, 1));
diff --git a/src/org/jblas/NativeBlas.java b/src/org/jblas/NativeBlas.java
index f577055..7dd6e00 100644
--- a/src/org/jblas/NativeBlas.java
+++ b/src/org/jblas/NativeBlas.java
@@ -89,152 +89,56 @@ public class NativeBlas {
     private static float[] floatDummy = new float[1];
 
      
-  public static native void caxpy(int n, ComplexFloat ca, float[] cx, int cxIdx, int incx, float[] cy, int cyIdx, int incy);
   public static native void ccopy(int n, float[] cx, int cxIdx, int incx, float[] cy, int cyIdx, int incy);
-  public static native ComplexFloat cdotc(int n, float[] cx, int cxIdx, int incx, float[] cy, int cyIdx, int incy);
-  public static native ComplexFloat cdotu(int n, float[] cx, int cxIdx, int incx, float[] cy, int cyIdx, int incy);
-  public static native void cgbmv(char trans, int m, int n, int kl, int ku, ComplexFloat alpha, float[] a, int aIdx, int lda, float[] x, int xIdx, int incx, ComplexFloat beta, float[] y, int yIdx, int incy);
-  public static native void cgemm(char transa, char transb, int m, int n, int k, ComplexFloat alpha, float[] a, int aIdx, int lda, float[] b, int bIdx, int ldb, ComplexFloat beta, float[] c, int cIdx, int ldc);
-  public static native void cgemv(char trans, int m, int n, ComplexFloat alpha, float[] a, int aIdx, int lda, float[] x, int xIdx, int incx, ComplexFloat beta, float[] y, int yIdx, int incy);
-  public static native void cgerc(int m, int n, ComplexFloat alpha, float[] x, int xIdx, int incx, float[] y, int yIdx, int incy, float[] a, int aIdx, int lda);
-  public static native void cgeru(int m, int n, ComplexFloat alpha, float[] x, int xIdx, int incx, float[] y, int yIdx, int incy, float[] a, int aIdx, int lda);
-  public static native void chbmv(char uplo, int n, int k, ComplexFloat alpha, float[] a, int aIdx, int lda, float[] x, int xIdx, int incx, ComplexFloat beta, float[] y, int yIdx, int incy);
-  public static native void chemm(char side, char uplo, int m, int n, ComplexFloat alpha, float[] a, int aIdx, int lda, float[] b, int bIdx, int ldb, ComplexFloat beta, float[] c, int cIdx, int ldc);
-  public static native void chemv(char uplo, int n, ComplexFloat alpha, float[] a, int aIdx, int lda, float[] x, int xIdx, int incx, ComplexFloat beta, float[] y, int yIdx, int incy);
-  public static native void cher2(char uplo, int n, ComplexFloat alpha, float[] x, int xIdx, int incx, float[] y, int yIdx, int incy, float[] a, int aIdx, int lda);
-  public static native void cher2k(char uplo, char trans, int n, int k, ComplexFloat alpha, float[] a, int aIdx, int lda, float[] b, int bIdx, int ldb, float beta, float[] c, int cIdx, int ldc);
-  public static native void cher(char uplo, int n, float alpha, float[] x, int xIdx, int incx, float[] a, int aIdx, int lda);
-  public static native void cherk(char uplo, char trans, int n, int k, float alpha, float[] a, int aIdx, int lda, float beta, float[] c, int cIdx, int ldc);
-  public static native void chpmv(char uplo, int n, ComplexFloat alpha, float[] ap, int apIdx, float[] x, int xIdx, int incx, ComplexFloat beta, float[] y, int yIdx, int incy);
-  public static native void chpr2(char uplo, int n, ComplexFloat alpha, float[] x, int xIdx, int incx, float[] y, int yIdx, int incy, float[] ap, int apIdx);
-  public static native void chpr(char uplo, int n, float alpha, float[] x, int xIdx, int incx, float[] ap, int apIdx);
-  public static native void crotg(ComplexFloat ca, ComplexFloat cb, float c, ComplexFloat s);
-  public static native void cscal(int n, ComplexFloat ca, float[] cx, int cxIdx, int incx);
-  public static native void csrot(int n, float[] cx, int cxIdx, int incx, float[] cy, int cyIdx, int incy, float c, float s);
-  public static native void csscal(int n, float sa, float[] cx, int cxIdx, int incx);
+  public static native void dcopy(int n, double[] dx, int dxIdx, int incx, double[] dy, int dyIdx, int incy);
+  public static native void scopy(int n, float[] sx, int sxIdx, int incx, float[] sy, int syIdx, int incy);
+  public static native void zcopy(int n, double[] zx, int zxIdx, int incx, double[] zy, int zyIdx, int incy);
   public static native void cswap(int n, float[] cx, int cxIdx, int incx, float[] cy, int cyIdx, int incy);
-  public static native void csymm(char side, char uplo, int m, int n, ComplexFloat alpha, float[] a, int aIdx, int lda, float[] b, int bIdx, int ldb, ComplexFloat beta, float[] c, int cIdx, int ldc);
-  public static native void csyr2k(char uplo, char trans, int n, int k, ComplexFloat alpha, float[] a, int aIdx, int lda, float[] b, int bIdx, int ldb, ComplexFloat beta, float[] c, int cIdx, int ldc);
-  public static native void csyrk(char uplo, char trans, int n, int k, ComplexFloat alpha, float[] a, int aIdx, int lda, ComplexFloat beta, float[] c, int cIdx, int ldc);
-  public static native void ctbmv(char uplo, char trans, char diag, int n, int k, float[] a, int aIdx, int lda, float[] x, int xIdx, int incx);
-  public static native void ctbsv(char uplo, char trans, char diag, int n, int k, float[] a, int aIdx, int lda, float[] x, int xIdx, int incx);
-  public static native void ctpmv(char uplo, char trans, char diag, int n, float[] ap, int apIdx, float[] x, int xIdx, int incx);
-  public static native void ctpsv(char uplo, char trans, char diag, int n, float[] ap, int apIdx, float[] x, int xIdx, int incx);
-  public static native void ctrmm(char side, char uplo, char transa, char diag, int m, int n, ComplexFloat alpha, float[] a, int aIdx, int lda, float[] b, int bIdx, int ldb);
-  public static native void ctrmv(char uplo, char trans, char diag, int n, float[] a, int aIdx, int lda, float[] x, int xIdx, int incx);
-  public static native void ctrsm(char side, char uplo, char transa, char diag, int m, int n, ComplexFloat alpha, float[] a, int aIdx, int lda, float[] b, int bIdx, int ldb);
-  public static native void ctrsv(char uplo, char trans, char diag, int n, float[] a, int aIdx, int lda, float[] x, int xIdx, int incx);
-  public static native double dasum(int n, double[] dx, int dxIdx, int incx);
+  public static native void dswap(int n, double[] dx, int dxIdx, int incx, double[] dy, int dyIdx, int incy);
+  public static native void sswap(int n, float[] sx, int sxIdx, int incx, float[] sy, int syIdx, int incy);
+  public static native void zswap(int n, double[] zx, int zxIdx, int incx, double[] zy, int zyIdx, int incy);
+  public static native void caxpy(int n, ComplexFloat ca, float[] cx, int cxIdx, int incx, float[] cy, int cyIdx, int incy);
   public static native void daxpy(int n, double da, double[] dx, int dxIdx, int incx, double[] dy, int dyIdx, int incy);
-  public static native double dcabs1(ComplexDouble z);
-  public static native void dcopy(int n, double[] dx, int dxIdx, int incx, double[] dy, int dyIdx, int incy);
+  public static native void saxpy(int n, float sa, float[] sx, int sxIdx, int incx, float[] sy, int syIdx, int incy);
+  public static native void zaxpy(int n, ComplexDouble za, double[] zx, int zxIdx, int incx, double[] zy, int zyIdx, int incy);
+  public static native void cscal(int n, ComplexFloat ca, float[] cx, int cxIdx, int incx);
+  public static native void dscal(int n, double da, double[] dx, int dxIdx, int incx);
+  public static native void sscal(int n, float sa, float[] sx, int sxIdx, int incx);
+  public static native void zscal(int n, ComplexDouble za, double[] zx, int zxIdx, int incx);
+  public static native void csscal(int n, float sa, float[] cx, int cxIdx, int incx);
+  public static native void zdscal(int n, double da, double[] zx, int zxIdx, int incx);
+  public static native ComplexFloat cdotc(int n, float[] cx, int cxIdx, int incx, float[] cy, int cyIdx, int incy);
+  public static native ComplexFloat cdotu(int n, float[] cx, int cxIdx, int incx, float[] cy, int cyIdx, int incy);
   public static native double ddot(int n, double[] dx, int dxIdx, int incx, double[] dy, int dyIdx, int incy);
-  public static native void dgbmv(char trans, int m, int n, int kl, int ku, double alpha, double[] a, int aIdx, int lda, double[] x, int xIdx, int incx, double beta, double[] y, int yIdx, int incy);
-  public static native void dgemm(char transa, char transb, int m, int n, int k, double alpha, double[] a, int aIdx, int lda, double[] b, int bIdx, int ldb, double beta, double[] c, int cIdx, int ldc);
-  public static native void dgemv(char trans, int m, int n, double alpha, double[] a, int aIdx, int lda, double[] x, int xIdx, int incx, double beta, double[] y, int yIdx, int incy);
-  public static native void dger(int m, int n, double alpha, double[] x, int xIdx, int incx, double[] y, int yIdx, int incy, double[] a, int aIdx, int lda);
+  public static native float sdot(int n, float[] sx, int sxIdx, int incx, float[] sy, int syIdx, int incy);
+  public static native ComplexDouble zdotc(int n, double[] zx, int zxIdx, int incx, double[] zy, int zyIdx, int incy);
+  public static native ComplexDouble zdotu(int n, double[] zx, int zxIdx, int incx, double[] zy, int zyIdx, int incy);
   public static native double dnrm2(int n, double[] x, int xIdx, int incx);
-  public static native void drot(int n, double[] dx, int dxIdx, int incx, double[] dy, int dyIdx, int incy, double c, double s);
-  public static native void drotg(double da, double db, double c, double s);
-  public static native void drotm(int n, double[] dx, int dxIdx, int incx, double[] dy, int dyIdx, int incy, double[] dparam, int dparamIdx);
-  public static native void drotmg(double[] dd1, int dd1Idx, double[] dd2, int dd2Idx, double[] dx1, int dx1Idx, double dy1, double[] dparam, int dparamIdx);
-  public static native void dsbmv(char uplo, int n, int k, double alpha, double[] a, int aIdx, int lda, double[] x, int xIdx, int incx, double beta, double[] y, int yIdx, int incy);
-  public static native void dscal(int n, double da, double[] dx, int dxIdx, int incx);
-  public static native void dspmv(char uplo, int n, double alpha, double[] ap, int apIdx, double[] x, int xIdx, int incx, double beta, double[] y, int yIdx, int incy);
-  public static native void dspr2(char uplo, int n, double alpha, double[] x, int xIdx, int incx, double[] y, int yIdx, int incy, double[] ap, int apIdx);
-  public static native void dspr(char uplo, int n, double alpha, double[] x, int xIdx, int incx, double[] ap, int apIdx);
-  public static native void dswap(int n, double[] dx, int dxIdx, int incx, double[] dy, int dyIdx, int incy);
-  public static native void dsymm(char side, char uplo, int m, int n, double alpha, double[] a, int aIdx, int lda, double[] b, int bIdx, int ldb, double beta, double[] c, int cIdx, int ldc);
-  public static native void dsymv(char uplo, int n, double alpha, double[] a, int aIdx, int lda, double[] x, int xIdx, int incx, double beta, double[] y, int yIdx, int incy);
-  public static native void dsyr2(char uplo, int n, double alpha, double[] x, int xIdx, int incx, double[] y, int yIdx, int incy, double[] a, int aIdx, int lda);
-  public static native void dsyr2k(char uplo, char trans, int n, int k, double alpha, double[] a, int aIdx, int lda, double[] b, int bIdx, int ldb, double beta, double[] c, int cIdx, int ldc);
-  public static native void dsyr(char uplo, int n, double alpha, double[] x, int xIdx, int incx, double[] a, int aIdx, int lda);
-  public static native void dsyrk(char uplo, char trans, int n, int k, double alpha, double[] a, int aIdx, int lda, double beta, double[] c, int cIdx, int ldc);
-  public static native void dtbmv(char uplo, char trans, char diag, int n, int k, double[] a, int aIdx, int lda, double[] x, int xIdx, int incx);
-  public static native void dtbsv(char uplo, char trans, char diag, int n, int k, double[] a, int aIdx, int lda, double[] x, int xIdx, int incx);
-  public static native void dtpmv(char uplo, char trans, char diag, int n, double[] ap, int apIdx, double[] x, int xIdx, int incx);
-  public static native void dtpsv(char uplo, char trans, char diag, int n, double[] ap, int apIdx, double[] x, int xIdx, int incx);
-  public static native void dtrmm(char side, char uplo, char transa, char diag, int m, int n, double alpha, double[] a, int aIdx, int lda, double[] b, int bIdx, int ldb);
-  public static native void dtrmv(char uplo, char trans, char diag, int n, double[] a, int aIdx, int lda, double[] x, int xIdx, int incx);
-  public static native void dtrsm(char side, char uplo, char transa, char diag, int m, int n, double alpha, double[] a, int aIdx, int lda, double[] b, int bIdx, int ldb);
-  public static native void dtrsv(char uplo, char trans, char diag, int n, double[] a, int aIdx, int lda, double[] x, int xIdx, int incx);
-  public static native double dzasum(int n, double[] zx, int zxIdx, int incx);
   public static native double dznrm2(int n, double[] x, int xIdx, int incx);
+  public static native float scnrm2(int n, float[] x, int xIdx, int incx);
+  public static native float snrm2(int n, float[] x, int xIdx, int incx);
+  public static native double dasum(int n, double[] dx, int dxIdx, int incx);
+  public static native double dzasum(int n, double[] zx, int zxIdx, int incx);
+  public static native float sasum(int n, float[] sx, int sxIdx, int incx);
+  public static native float scasum(int n, float[] cx, int cxIdx, int incx);
   public static native int icamax(int n, float[] cx, int cxIdx, int incx);
   public static native int idamax(int n, double[] dx, int dxIdx, int incx);
   public static native int isamax(int n, float[] sx, int sxIdx, int incx);
   public static native int izamax(int n, double[] zx, int zxIdx, int incx);
-  public static native int lsame(char ca, char cb);
-  public static native float sasum(int n, float[] sx, int sxIdx, int incx);
-  public static native void saxpy(int n, float sa, float[] sx, int sxIdx, int incx, float[] sy, int syIdx, int incy);
-  public static native float scasum(int n, float[] cx, int cxIdx, int incx);
-  public static native float scnrm2(int n, float[] x, int xIdx, int incx);
-  public static native void scopy(int n, float[] sx, int sxIdx, int incx, float[] sy, int syIdx, int incy);
-  public static native float sdot(int n, float[] sx, int sxIdx, int incx, float[] sy, int syIdx, int incy);
-  public static native void sgbmv(char trans, int m, int n, int kl, int ku, float alpha, float[] a, int aIdx, int lda, float[] x, int xIdx, int incx, float beta, float[] y, int yIdx, int incy);
-  public static native void sgemm(char transa, char transb, int m, int n, int k, float alpha, float[] a, int aIdx, int lda, float[] b, int bIdx, int ldb, float beta, float[] c, int cIdx, int ldc);
+  public static native void cgemv(char trans, int m, int n, ComplexFloat alpha, float[] a, int aIdx, int lda, float[] x, int xIdx, int incx, ComplexFloat beta, float[] y, int yIdx, int incy);
+  public static native void dgemv(char trans, int m, int n, double alpha, double[] a, int aIdx, int lda, double[] x, int xIdx, int incx, double beta, double[] y, int yIdx, int incy);
   public static native void sgemv(char trans, int m, int n, float alpha, float[] a, int aIdx, int lda, float[] x, int xIdx, int incx, float beta, float[] y, int yIdx, int incy);
-  public static native void sger(int m, int n, float alpha, float[] x, int xIdx, int incx, float[] y, int yIdx, int incy, float[] a, int aIdx, int lda);
-  public static native float snrm2(int n, float[] x, int xIdx, int incx);
-  public static native void srot(int n, float[] sx, int sxIdx, int incx, float[] sy, int syIdx, int incy, float c, float s);
-  public static native void srotg(float sa, float sb, float c, float s);
-  public static native void srotm(int n, float[] sx, int sxIdx, int incx, float[] sy, int syIdx, int incy, float[] sparam, int sparamIdx);
-  public static native void srotmg(float[] sd1, int sd1Idx, float[] sd2, int sd2Idx, float[] sx1, int sx1Idx, float sy1, float[] sparam, int sparamIdx);
-  public static native void ssbmv(char uplo, int n, int k, float alpha, float[] a, int aIdx, int lda, float[] x, int xIdx, int incx, float beta, float[] y, int yIdx, int incy);
-  public static native void sscal(int n, float sa, float[] sx, int sxIdx, int incx);
-  public static native void sspmv(char uplo, int n, float alpha, float[] ap, int apIdx, float[] x, int xIdx, int incx, float beta, float[] y, int yIdx, int incy);
-  public static native void sspr2(char uplo, int n, float alpha, float[] x, int xIdx, int incx, float[] y, int yIdx, int incy, float[] ap, int apIdx);
-  public static native void sspr(char uplo, int n, float alpha, float[] x, int xIdx, int incx, float[] ap, int apIdx);
-  public static native void sswap(int n, float[] sx, int sxIdx, int incx, float[] sy, int syIdx, int incy);
-  public static native void ssymm(char side, char uplo, int m, int n, float alpha, float[] a, int aIdx, int lda, float[] b, int bIdx, int ldb, float beta, float[] c, int cIdx, int ldc);
-  public static native void ssymv(char uplo, int n, float alpha, float[] a, int aIdx, int lda, float[] x, int xIdx, int incx, float beta, float[] y, int yIdx, int incy);
-  public static native void ssyr2(char uplo, int n, float alpha, float[] x, int xIdx, int incx, float[] y, int yIdx, int incy, float[] a, int aIdx, int lda);
-  public static native void ssyr2k(char uplo, char trans, int n, int k, float alpha, float[] a, int aIdx, int lda, float[] b, int bIdx, int ldb, float beta, float[] c, int cIdx, int ldc);
-  public static native void ssyr(char uplo, int n, float alpha, float[] x, int xIdx, int incx, float[] a, int aIdx, int lda);
-  public static native void ssyrk(char uplo, char trans, int n, int k, float alpha, float[] a, int aIdx, int lda, float beta, float[] c, int cIdx, int ldc);
-  public static native void stbmv(char uplo, char trans, char diag, int n, int k, float[] a, int aIdx, int lda, float[] x, int xIdx, int incx);
-  public static native void stbsv(char uplo, char trans, char diag, int n, int k, float[] a, int aIdx, int lda, float[] x, int xIdx, int incx);
-  public static native void stpmv(char uplo, char trans, char diag, int n, float[] ap, int apIdx, float[] x, int xIdx, int incx);
-  public static native void stpsv(char uplo, char trans, char diag, int n, float[] ap, int apIdx, float[] x, int xIdx, int incx);
-  public static native void strmm(char side, char uplo, char transa, char diag, int m, int n, float alpha, float[] a, int aIdx, int lda, float[] b, int bIdx, int ldb);
-  public static native void strmv(char uplo, char trans, char diag, int n, float[] a, int aIdx, int lda, float[] x, int xIdx, int incx);
-  public static native void strsm(char side, char uplo, char transa, char diag, int m, int n, float alpha, float[] a, int aIdx, int lda, float[] b, int bIdx, int ldb);
-  public static native void strsv(char uplo, char trans, char diag, int n, float[] a, int aIdx, int lda, float[] x, int xIdx, int incx);
-  public static native void zaxpy(int n, ComplexDouble za, double[] zx, int zxIdx, int incx, double[] zy, int zyIdx, int incy);
-  public static native void zcopy(int n, double[] zx, int zxIdx, int incx, double[] zy, int zyIdx, int incy);
-  public static native ComplexDouble zdotc(int n, double[] zx, int zxIdx, int incx, double[] zy, int zyIdx, int incy);
-  public static native ComplexDouble zdotu(int n, double[] zx, int zxIdx, int incx, double[] zy, int zyIdx, int incy);
-  public static native void zdrot(int n, double[] cx, int cxIdx, int incx, double[] cy, int cyIdx, int incy, double c, double s);
-  public static native void zdscal(int n, double da, double[] zx, int zxIdx, int incx);
-  public static native void zgbmv(char trans, int m, int n, int kl, int ku, ComplexDouble alpha, double[] a, int aIdx, int lda, double[] x, int xIdx, int incx, ComplexDouble beta, double[] y, int yIdx, int incy);
-  public static native void zgemm(char transa, char transb, int m, int n, int k, ComplexDouble alpha, double[] a, int aIdx, int lda, double[] b, int bIdx, int ldb, ComplexDouble beta, double[] c, int cIdx, int ldc);
   public static native void zgemv(char trans, int m, int n, ComplexDouble alpha, double[] a, int aIdx, int lda, double[] x, int xIdx, int incx, ComplexDouble beta, double[] y, int yIdx, int incy);
+  public static native void cgerc(int m, int n, ComplexFloat alpha, float[] x, int xIdx, int incx, float[] y, int yIdx, int incy, float[] a, int aIdx, int lda);
+  public static native void cgeru(int m, int n, ComplexFloat alpha, float[] x, int xIdx, int incx, float[] y, int yIdx, int incy, float[] a, int aIdx, int lda);
+  public static native void dger(int m, int n, double alpha, double[] x, int xIdx, int incx, double[] y, int yIdx, int incy, double[] a, int aIdx, int lda);
+  public static native void sger(int m, int n, float alpha, float[] x, int xIdx, int incx, float[] y, int yIdx, int incy, float[] a, int aIdx, int lda);
   public static native void zgerc(int m, int n, ComplexDouble alpha, double[] x, int xIdx, int incx, double[] y, int yIdx, int incy, double[] a, int aIdx, int lda);
   public static native void zgeru(int m, int n, ComplexDouble alpha, double[] x, int xIdx, int incx, double[] y, int yIdx, int incy, double[] a, int aIdx, int lda);
-  public static native void zhbmv(char uplo, int n, int k, ComplexDouble alpha, double[] a, int aIdx, int lda, double[] x, int xIdx, int incx, ComplexDouble beta, double[] y, int yIdx, int incy);
-  public static native void zhemm(char side, char uplo, int m, int n, ComplexDouble alpha, double[] a, int aIdx, int lda, double[] b, int bIdx, int ldb, ComplexDouble beta, double[] c, int cIdx, int ldc);
-  public static native void zhemv(char uplo, int n, ComplexDouble alpha, double[] a, int aIdx, int lda, double[] x, int xIdx, int incx, ComplexDouble beta, double[] y, int yIdx, int incy);
-  public static native void zher2(char uplo, int n, ComplexDouble alpha, double[] x, int xIdx, int incx, double[] y, int yIdx, int incy, double[] a, int aIdx, int lda);
-  public static native void zher2k(char uplo, char trans, int n, int k, ComplexDouble alpha, double[] a, int aIdx, int lda, double[] b, int bIdx, int ldb, double beta, double[] c, int cIdx, int ldc);
-  public static native void zher(char uplo, int n, double alpha, double[] x, int xIdx, int incx, double[] a, int aIdx, int lda);
-  public static native void zherk(char uplo, char trans, int n, int k, double alpha, double[] a, int aIdx, int lda, double beta, double[] c, int cIdx, int ldc);
-  public static native void zhpmv(char uplo, int n, ComplexDouble alpha, double[] ap, int apIdx, double[] x, int xIdx, int incx, ComplexDouble beta, double[] y, int yIdx, int incy);
-  public static native void zhpr2(char uplo, int n, ComplexDouble alpha, double[] x, int xIdx, int incx, double[] y, int yIdx, int incy, double[] ap, int apIdx);
-  public static native void zhpr(char uplo, int n, double alpha, double[] x, int xIdx, int incx, double[] ap, int apIdx);
-  public static native void zrotg(ComplexDouble ca, ComplexDouble cb, double c, ComplexDouble s);
-  public static native void zscal(int n, ComplexDouble za, double[] zx, int zxIdx, int incx);
-  public static native void zswap(int n, double[] zx, int zxIdx, int incx, double[] zy, int zyIdx, int incy);
-  public static native void zsymm(char side, char uplo, int m, int n, ComplexDouble alpha, double[] a, int aIdx, int lda, double[] b, int bIdx, int ldb, ComplexDouble beta, double[] c, int cIdx, int ldc);
-  public static native void zsyr2k(char uplo, char trans, int n, int k, ComplexDouble alpha, double[] a, int aIdx, int lda, double[] b, int bIdx, int ldb, ComplexDouble beta, double[] c, int cIdx, int ldc);
-  public static native void zsyrk(char uplo, char trans, int n, int k, ComplexDouble alpha, double[] a, int aIdx, int lda, ComplexDouble beta, double[] c, int cIdx, int ldc);
-  public static native void ztbmv(char uplo, char trans, char diag, int n, int k, double[] a, int aIdx, int lda, double[] x, int xIdx, int incx);
-  public static native void ztbsv(char uplo, char trans, char diag, int n, int k, double[] a, int aIdx, int lda, double[] x, int xIdx, int incx);
-  public static native void ztpmv(char uplo, char trans, char diag, int n, double[] ap, int apIdx, double[] x, int xIdx, int incx);
-  public static native void ztpsv(char uplo, char trans, char diag, int n, double[] ap, int apIdx, double[] x, int xIdx, int incx);
-  public static native void ztrmm(char side, char uplo, char transa, char diag, int m, int n, ComplexDouble alpha, double[] a, int aIdx, int lda, double[] b, int bIdx, int ldb);
-  public static native void ztrmv(char uplo, char trans, char diag, int n, double[] a, int aIdx, int lda, double[] x, int xIdx, int incx);
-  public static native void ztrsm(char side, char uplo, char transa, char diag, int m, int n, ComplexDouble alpha, double[] a, int aIdx, int lda, double[] b, int bIdx, int ldb);
-  public static native void ztrsv(char uplo, char trans, char diag, int n, double[] a, int aIdx, int lda, double[] x, int xIdx, int incx);
+  public static native void cgemm(char transa, char transb, int m, int n, int k, ComplexFloat alpha, float[] a, int aIdx, int lda, float[] b, int bIdx, int ldb, ComplexFloat beta, float[] c, int cIdx, int ldc);
+  public static native void dgemm(char transa, char transb, int m, int n, int k, double alpha, double[] a, int aIdx, int lda, double[] b, int bIdx, int ldb, double beta, double[] c, int cIdx, int ldc);
+  public static native void sgemm(char transa, char transb, int m, int n, int k, float alpha, float[] a, int aIdx, int lda, float[] b, int bIdx, int ldb, float beta, float[] c, int cIdx, int ldc);
+  public static native void zgemm(char transa, char transb, int m, int n, int k, ComplexDouble alpha, double[] a, int aIdx, int lda, double[] b, int bIdx, int ldb, ComplexDouble beta, double[] c, int cIdx, int ldc);
   public static native int dgesv(int n, int nrhs, double[] a, int aIdx, int lda, int[] ipiv, int ipivIdx, double[] b, int bIdx, int ldb);
   public static native int sgesv(int n, int nrhs, float[] a, int aIdx, int lda, int[] ipiv, int ipivIdx, float[] b, int bIdx, int ldb);
   public static native int dsysv(char uplo, int n, int nrhs, double[] a, int aIdx, int lda, int[] ipiv, int ipivIdx, double[] b, int bIdx, int ldb, double[] work, int workIdx, int lwork);
@@ -384,12 +288,12 @@ public class NativeBlas {
   public static native int cgeev(char jobvl, char jobvr, int n, float[] a, int aIdx, int lda, float[] w, int wIdx, float[] vl, int vlIdx, int ldvl, float[] vr, int vrIdx, int ldvr, float[] work, int workIdx, int lwork, float[] rwork, int rworkIdx);
   public static int cgeev(char jobvl, char jobvr, int n, float[] a, int aIdx, int lda, float[] w, int wIdx, float[] vl, int vlIdx, int ldvl, float[] vr, int vrIdx, int ldvr, float[] rwork, int rworkIdx) {
     int info;
-    float[] work = new float[1];
+    float[] work = new float[1*2];
     int lwork;
     info = cgeev(jobvl, jobvr, n, floatDummy, 0, lda, floatDummy, 0, floatDummy, 0, ldvl, floatDummy, 0, ldvr, work, 0, -1, floatDummy, 0);
     if (info != 0)
       return info;
-    lwork = (int) work[0]; work = new float[lwork];
+    lwork = (int) work[0]; work = new float[lwork*2];
     info = cgeev(jobvl, jobvr, n, a, aIdx, lda, w, wIdx, vl, vlIdx, ldvl, vr, vrIdx, ldvr, work, 0, lwork, rwork, rworkIdx);
     return info;
   }
@@ -423,12 +327,12 @@ public class NativeBlas {
   public static native int zgeev(char jobvl, char jobvr, int n, double[] a, int aIdx, int lda, double[] w, int wIdx, double[] vl, int vlIdx, int ldvl, double[] vr, int vrIdx, int ldvr, double[] work, int workIdx, int lwork, double[] rwork, int rworkIdx);
   public static int zgeev(char jobvl, char jobvr, int n, double[] a, int aIdx, int lda, double[] w, int wIdx, double[] vl, int vlIdx, int ldvl, double[] vr, int vrIdx, int ldvr, double[] rwork, int rworkIdx) {
     int info;
-    double[] work = new double[1];
+    double[] work = new double[1*2];
     int lwork;
     info = zgeev(jobvl, jobvr, n, doubleDummy, 0, lda, doubleDummy, 0, doubleDummy, 0, ldvl, doubleDummy, 0, ldvr, work, 0, -1, doubleDummy, 0);
     if (info != 0)
       return info;
-    lwork = (int) work[0]; work = new double[lwork];
+    lwork = (int) work[0]; work = new double[lwork*2];
     info = zgeev(jobvl, jobvr, n, a, aIdx, lda, w, wIdx, vl, vlIdx, ldvl, vr, vrIdx, ldvr, work, 0, lwork, rwork, rworkIdx);
     return info;
   }
@@ -437,5 +341,57 @@ public class NativeBlas {
   public static native int sgetrf(int m, int n, float[] a, int aIdx, int lda, int[] ipiv, int ipivIdx);
   public static native int dpotrf(char uplo, int n, double[] a, int aIdx, int lda);
   public static native int spotrf(char uplo, int n, float[] a, int aIdx, int lda);
+  public static native int cgesvd(char jobu, char jobvt, int m, int n, float[] a, int aIdx, int lda, float[] s, int sIdx, float[] u, int uIdx, int ldu, float[] vt, int vtIdx, int ldvt, float[] work, int workIdx, int lwork, float[] rwork, int rworkIdx);
+  public static int cgesvd(char jobu, char jobvt, int m, int n, float[] a, int aIdx, int lda, float[] s, int sIdx, float[] u, int uIdx, int ldu, float[] vt, int vtIdx, int ldvt, float[] rwork, int rworkIdx) {
+    int info;
+    float[] work = new float[1*2];
+    int lwork;
+    info = cgesvd(jobu, jobvt, m, n, floatDummy, 0, lda, floatDummy, 0, floatDummy, 0, ldu, floatDummy, 0, ldvt, work, 0, -1, floatDummy, 0);
+    if (info != 0)
+      return info;
+    lwork = (int) work[0]; work = new float[lwork*2];
+    info = cgesvd(jobu, jobvt, m, n, a, aIdx, lda, s, sIdx, u, uIdx, ldu, vt, vtIdx, ldvt, work, 0, lwork, rwork, rworkIdx);
+    return info;
+  }
+
+  public static native int dgesvd(char jobu, char jobvt, int m, int n, double[] a, int aIdx, int lda, double[] s, int sIdx, double[] u, int uIdx, int ldu, double[] vt, int vtIdx, int ldvt, double[] work, int workIdx, int lwork);
+  public static int dgesvd(char jobu, char jobvt, int m, int n, double[] a, int aIdx, int lda, double[] s, int sIdx, double[] u, int uIdx, int ldu, double[] vt, int vtIdx, int ldvt) {
+    int info;
+    double[] work = new double[1];
+    int lwork;
+    info = dgesvd(jobu, jobvt, m, n, doubleDummy, 0, lda, doubleDummy, 0, doubleDummy, 0, ldu, doubleDummy, 0, ldvt, work, 0, -1);
+    if (info != 0)
+      return info;
+    lwork = (int) work[0]; work = new double[lwork];
+    info = dgesvd(jobu, jobvt, m, n, a, aIdx, lda, s, sIdx, u, uIdx, ldu, vt, vtIdx, ldvt, work, 0, lwork);
+    return info;
+  }
+
+  public static native int sgesvd(char jobu, char jobvt, int m, int n, float[] a, int aIdx, int lda, float[] s, int sIdx, float[] u, int uIdx, int ldu, float[] vt, int vtIdx, int ldvt, float[] work, int workIdx, int lwork);
+  public static int sgesvd(char jobu, char jobvt, int m, int n, float[] a, int aIdx, int lda, float[] s, int sIdx, float[] u, int uIdx, int ldu, float[] vt, int vtIdx, int ldvt) {
+    int info;
+    float[] work = new float[1];
+    int lwork;
+    info = sgesvd(jobu, jobvt, m, n, floatDummy, 0, lda, floatDummy, 0, floatDummy, 0, ldu, floatDummy, 0, ldvt, work, 0, -1);
+    if (info != 0)
+      return info;
+    lwork = (int) work[0]; work = new float[lwork];
+    info = sgesvd(jobu, jobvt, m, n, a, aIdx, lda, s, sIdx, u, uIdx, ldu, vt, vtIdx, ldvt, work, 0, lwork);
+    return info;
+  }
+
+  public static native int zgesvd(char jobu, char jobvt, int m, int n, double[] a, int aIdx, int lda, double[] s, int sIdx, double[] u, int uIdx, int ldu, double[] vt, int vtIdx, int ldvt, double[] work, int workIdx, int lwork, double[] rwork, int rworkIdx);
+  public static int zgesvd(char jobu, char jobvt, int m, int n, double[] a, int aIdx, int lda, double[] s, int sIdx, double[] u, int uIdx, int ldu, double[] vt, int vtIdx, int ldvt, double[] rwork, int rworkIdx) {
+    int info;
+    double[] work = new double[1*2];
+    int lwork;
+    info = zgesvd(jobu, jobvt, m, n, doubleDummy, 0, lda, doubleDummy, 0, doubleDummy, 0, ldu, doubleDummy, 0, ldvt, work, 0, -1, doubleDummy, 0);
+    if (info != 0)
+      return info;
+    lwork = (int) work[0]; work = new double[lwork*2];
+    info = zgesvd(jobu, jobvt, m, n, a, aIdx, lda, s, sIdx, u, uIdx, ldu, vt, vtIdx, ldvt, work, 0, lwork, rwork, rworkIdx);
+    return info;
+  }
+
 
 }
diff --git a/src/org/jblas/Singular.java b/src/org/jblas/Singular.java
new file mode 100644
index 0000000..424eb12
--- /dev/null
+++ b/src/org/jblas/Singular.java
@@ -0,0 +1,193 @@
+/*
+ * To change this template, choose Tools | Templates
+ * and open the template in the editor.
+ */
+package org.jblas;
+
+import static org.jblas.util.Functions.min;
+
+/**
+ *
+ */
+public class Singular {
+
+    /**
+     * Compute a singular-value decomposition of A.
+     *
+     * @return A DoubleMatrix[3] array of U, S, V such that A = U * diag(S) * V'
+     */
+    public static DoubleMatrix[] fullSVD(DoubleMatrix A) {
+        int m = A.rows;
+        int n = A.columns;
+
+        DoubleMatrix U = new DoubleMatrix(m, m);
+        DoubleMatrix S = new DoubleMatrix(min(m, n));
+        DoubleMatrix V = new DoubleMatrix(n, n);
+
+        NativeBlas.dgesvd('A', 'A', m, n, A.dup().data, 0, m, S.data, 0, U.data, 0, m, V.data, 0, n);
+
+        return new DoubleMatrix[]{U, S, V.transpose()};
+    }
+
+    /**
+     * Compute a singular-value decomposition of A (sparse variant).
+     * Sparse means that the matrices U and V are not square but
+     * only have as many columns (or rows) as possible.
+     * 
+     * @param A
+     * @return A DoubleMatrix[3] array of U, S, V such that A = U * diag(S) * V'
+     */
+    public static DoubleMatrix[] sparseSVD(DoubleMatrix A) {
+        int m = A.rows;
+        int n = A.columns;
+
+        DoubleMatrix U = new DoubleMatrix(m, min(m, n));
+        DoubleMatrix S = new DoubleMatrix(min(m, n));
+        DoubleMatrix V = new DoubleMatrix(min(m, n), n);
+
+        NativeBlas.dgesvd('S', 'S', m, n, A.dup().data, 0, m, S.data, 0, U.data, 0, m, V.data, 0, min(m, n));
+
+        return new DoubleMatrix[]{U, S, V.transpose()};
+    }
+
+    public static ComplexDoubleMatrix[] sparseSVD(ComplexDoubleMatrix A) {
+        int m = A.rows;
+        int n = A.columns;
+
+        ComplexDoubleMatrix U = new ComplexDoubleMatrix(m, min(m, n));
+        DoubleMatrix S = new DoubleMatrix(min(m, n));
+        ComplexDoubleMatrix V = new ComplexDoubleMatrix(min(m, n), n);
+
+        double[] rwork = new double[5*min(m,n)];
+
+        NativeBlas.zgesvd('S', 'S', m, n, A.dup().data, 0, m, S.data, 0, U.data, 0, m, V.data, 0, min(m, n), rwork, 0);
+
+        return new ComplexDoubleMatrix[]{U, new ComplexDoubleMatrix(S), V.transpose()};
+    }
+
+    /**
+     * Compute the singular values of a matrix.
+     *
+     * @param A DoubleMatrix of dimension m * n
+     * @return A min(m, n) vector of singular values.
+     */
+    public static DoubleMatrix SVDValues(DoubleMatrix A) {
+        int m = A.rows;
+        int n = A.columns;
+        DoubleMatrix S = new DoubleMatrix(min(m, n));
+
+        NativeBlas.dgesvd('N', 'N', m, n, A.dup().data, 0, m, S.data, 0, null, 0, 1, null, 0, 1);
+
+        return S;
+    }
+
+    /**
+     * Compute the singular values of a complex matrix.
+     *
+     * @param A ComplexDoubleMatrix of dimension m * n
+     * @return A real-valued (!) min(m, n) vector of singular values.
+     */
+    public static DoubleMatrix SVDValues(ComplexDoubleMatrix A) {
+        int m = A.rows;
+        int n = A.columns;
+        DoubleMatrix S = new DoubleMatrix(min(m, n));
+        double[] rwork = new double[5*min(m,n)];
+
+        NativeBlas.zgesvd('N', 'N', m, n, A.dup().data, 0, m, S.data, 0, null, 0, 1, null, 0, min(m,n), rwork, 0);
+
+        return S;
+    }
+
+    //BEGIN
+  // The code below has been automatically generated.
+  // DO NOT EDIT!
+
+    /**
+     * Compute a singular-value decomposition of A.
+     *
+     * @return A FloatMatrix[3] array of U, S, V such that A = U * diag(S) * V'
+     */
+    public static FloatMatrix[] fullSVD(FloatMatrix A) {
+        int m = A.rows;
+        int n = A.columns;
+
+        FloatMatrix U = new FloatMatrix(m, m);
+        FloatMatrix S = new FloatMatrix(min(m, n));
+        FloatMatrix V = new FloatMatrix(n, n);
+
+        NativeBlas.sgesvd('A', 'A', m, n, A.dup().data, 0, m, S.data, 0, U.data, 0, m, V.data, 0, n);
+
+        return new FloatMatrix[]{U, S, V.transpose()};
+    }
+
+    /**
+     * Compute a singular-value decomposition of A (sparse variant).
+     * Sparse means that the matrices U and V are not square but
+     * only have as many columns (or rows) as possible.
+     * 
+     * @param A
+     * @return A FloatMatrix[3] array of U, S, V such that A = U * diag(S) * V'
+     */
+    public static FloatMatrix[] sparseSVD(FloatMatrix A) {
+        int m = A.rows;
+        int n = A.columns;
+
+        FloatMatrix U = new FloatMatrix(m, min(m, n));
+        FloatMatrix S = new FloatMatrix(min(m, n));
+        FloatMatrix V = new FloatMatrix(min(m, n), n);
+
+        NativeBlas.sgesvd('S', 'S', m, n, A.dup().data, 0, m, S.data, 0, U.data, 0, m, V.data, 0, min(m, n));
+
+        return new FloatMatrix[]{U, S, V.transpose()};
+    }
+
+    public static ComplexFloatMatrix[] sparseSVD(ComplexFloatMatrix A) {
+        int m = A.rows;
+        int n = A.columns;
+
+        ComplexFloatMatrix U = new ComplexFloatMatrix(m, min(m, n));
+        FloatMatrix S = new FloatMatrix(min(m, n));
+        ComplexFloatMatrix V = new ComplexFloatMatrix(min(m, n), n);
+
+        float[] rwork = new float[5*min(m,n)];
+
+        NativeBlas.cgesvd('S', 'S', m, n, A.dup().data, 0, m, S.data, 0, U.data, 0, m, V.data, 0, min(m, n), rwork, 0);
+
+        return new ComplexFloatMatrix[]{U, new ComplexFloatMatrix(S), V.transpose()};
+    }
+
+    /**
+     * Compute the singular values of a matrix.
+     *
+     * @param A FloatMatrix of dimension m * n
+     * @return A min(m, n) vector of singular values.
+     */
+    public static FloatMatrix SVDValues(FloatMatrix A) {
+        int m = A.rows;
+        int n = A.columns;
+        FloatMatrix S = new FloatMatrix(min(m, n));
+
+        NativeBlas.sgesvd('N', 'N', m, n, A.dup().data, 0, m, S.data, 0, null, 0, 1, null, 0, 1);
+
+        return S;
+    }
+
+    /**
+     * Compute the singular values of a complex matrix.
+     *
+     * @param A ComplexFloatMatrix of dimension m * n
+     * @return A real-valued (!) min(m, n) vector of singular values.
+     */
+    public static FloatMatrix SVDValues(ComplexFloatMatrix A) {
+        int m = A.rows;
+        int n = A.columns;
+        FloatMatrix S = new FloatMatrix(min(m, n));
+        float[] rwork = new float[5*min(m,n)];
+
+        NativeBlas.cgesvd('N', 'N', m, n, A.dup().data, 0, m, S.data, 0, null, 0, 1, null, 0, min(m,n), rwork, 0);
+
+        return S;
+    }
+
+    //END
+}
diff --git a/src/org/jblas/Solve.java b/src/org/jblas/Solve.java
index 076f6b2..a6ec241 100644
--- a/src/org/jblas/Solve.java
+++ b/src/org/jblas/Solve.java
@@ -40,7 +40,7 @@ package org.jblas;
  * Solving linear equations.
  */
 public class Solve {
-	/* Solves the linear equation A*X = B. */
+	/** Solves the linear equation A*X = B. */
 	public static DoubleMatrix solve(DoubleMatrix A, DoubleMatrix B) {
 		A.assertSquare();
 		DoubleMatrix X = B.dup();
@@ -49,7 +49,7 @@ public class Solve {
 		return X;
 	}
 
-	/* Solves the linear equation A*X = B for symmetric A. */
+	/** Solves the linear equation A*X = B for symmetric A. */
 	public static DoubleMatrix solveSymmetric(DoubleMatrix A, DoubleMatrix B) {
 		A.assertSquare();
 		DoubleMatrix X = B.dup();
@@ -59,7 +59,7 @@ public class Solve {
 	}
 
 	
-	/* Solves the linear equation A*X = B for symmetric and positive definite A. */
+	/** Solves the linear equation A*X = B for symmetric and positive definite A. */
 	public static DoubleMatrix solvePositive(DoubleMatrix A, DoubleMatrix B) {
 		A.assertSquare();
 		DoubleMatrix X = B.dup();
@@ -70,7 +70,7 @@ public class Solve {
 //BEGIN
   // The code below has been automatically generated.
   // DO NOT EDIT!
-	/* Solves the linear equation A*X = B. */
+	/** Solves the linear equation A*X = B. */
 	public static FloatMatrix solve(FloatMatrix A, FloatMatrix B) {
 		A.assertSquare();
 		FloatMatrix X = B.dup();
@@ -79,7 +79,7 @@ public class Solve {
 		return X;
 	}
 
-	/* Solves the linear equation A*X = B for symmetric A. */
+	/** Solves the linear equation A*X = B for symmetric A. */
 	public static FloatMatrix solveSymmetric(FloatMatrix A, FloatMatrix B) {
 		A.assertSquare();
 		FloatMatrix X = B.dup();
@@ -89,7 +89,7 @@ public class Solve {
 	}
 
 	
-	/* Solves the linear equation A*X = B for symmetric and positive definite A. */
+	/** Solves the linear equation A*X = B for symmetric and positive definite A. */
 	public static FloatMatrix solvePositive(FloatMatrix A, FloatMatrix B) {
 		A.assertSquare();
 		FloatMatrix X = B.dup();
diff --git a/src/org/jblas/benchmark/BenchmarkResult.java b/src/org/jblas/benchmark/BenchmarkResult.java
index e15585a..44fc14f 100644
--- a/src/org/jblas/benchmark/BenchmarkResult.java
+++ b/src/org/jblas/benchmark/BenchmarkResult.java
@@ -51,8 +51,8 @@ class BenchmarkResult {
     }
 
     void printResult() {
-        System.out.printf("%6.1f MFLOPS (%d iterations in %.1f seconds)%n",
-                numOps / duration / 1e6,
+        System.out.printf("%6.3f GFLOPS (%d iterations in %.1f seconds)%n",
+                numOps / duration / 1e9,
                 iterations,
                 duration);
     }
diff --git a/src/org/jblas/benchmark/JavaDoubleMultiplicationBenchmark.java b/src/org/jblas/benchmark/JavaDoubleMultiplicationBenchmark.java
index e970939..599c92d 100644
--- a/src/org/jblas/benchmark/JavaDoubleMultiplicationBenchmark.java
+++ b/src/org/jblas/benchmark/JavaDoubleMultiplicationBenchmark.java
@@ -59,7 +59,7 @@ class JavaDoubleMultiplicationBenchmark implements Benchmark {
                 int kn = k * n;
                 double bkjn = B[k + jn];
                 for (int i = 0; i < n; i++) {
-                    C[i + jn] += A[i + kn] + bkjn;
+                    C[i + jn] += A[i + kn] * bkjn;
                 }
             }
         }
diff --git a/src/org/jblas/benchmark/JavaFloatMultiplicationBenchmark.java b/src/org/jblas/benchmark/JavaFloatMultiplicationBenchmark.java
index 2821b1f..251345c 100644
--- a/src/org/jblas/benchmark/JavaFloatMultiplicationBenchmark.java
+++ b/src/org/jblas/benchmark/JavaFloatMultiplicationBenchmark.java
@@ -59,7 +59,7 @@ class JavaFloatMultiplicationBenchmark implements Benchmark {
                 int kn = k * n;
                 float bkjn = B[k + jn];
                 for (int i = 0; i < n; i++) {
-                    C[i + jn] += A[i + kn] + bkjn;
+                    C[i + jn] += A[i + kn] * bkjn;
                 }
             }
         }
diff --git a/src/org/jblas/benchmark/Main.java b/src/org/jblas/benchmark/Main.java
index 46ca5b9..a3e98c7 100644
--- a/src/org/jblas/benchmark/Main.java
+++ b/src/org/jblas/benchmark/Main.java
@@ -61,7 +61,8 @@ public class Main {
                 + "%n"
                 + "  --arch-flavor=value     overriding arch flavor (e.g. --arch-flavor=sse2)%n"
                 + "  --skip-java             don't run java benchmarks%n"
-                + "  --help                  show this help%n");
+                + "  --help                  show this help%n"
+                + "  --debug                 set config levels to debug%n");
     }
 
     public static void main(String[] args) {
@@ -100,24 +101,15 @@ public class Main {
             return;
         }
 
-        out.println(
-                "Simple benchmark for jblas");
+        out.println("Simple benchmark for jblas");
         out.println();
 
-        out.println(
-                "Running sanity benchmarks.");
+        out.println("Running sanity benchmarks.");
         out.println();
         org.jblas.util.SanityChecks.main(args);
         out.println();
 
-        out.println(
-                "Each benchmark will take about 5 seconds...");
-
-
-
-
-
-
+        out.println("Each benchmark will take about 5 seconds...");
 
         for (Benchmark b : multiplicationBenchmarks) {
             if (skipJava) {
diff --git a/src/org/jblas/util/LibraryLoader.java b/src/org/jblas/util/LibraryLoader.java
index 768f3c6..b5862f4 100644
--- a/src/org/jblas/util/LibraryLoader.java
+++ b/src/org/jblas/util/LibraryLoader.java
@@ -43,6 +43,14 @@ import java.io.*;
  */
 public class LibraryLoader {
 
+    private Logger logger;
+    private String libpath;
+    
+    public LibraryLoader() {
+        logger = Logger.getLogger();
+        libpath = null;
+    }
+
     /**
      * <p>Find the library <tt>libname</tt> as a resource, copy it to a tempfile
      * and load it using System.load(). The name of the library has to be the
@@ -60,67 +68,89 @@ public class LibraryLoader {
      * @throws UnsatisfiedLinkError if library cannot be founds
      */
     public void loadLibrary(String libname, boolean withFlavor) {
-        String libpath;
-        Logger logger = Logger.getLogger();
+        // preload flavor libraries
+        String flavor = null;
+        if (withFlavor) {
+            logger.debug("Preloading ArchFlavor library.");
+            flavor = ArchFlavor.archFlavor();
+        }
 
         libname = System.mapLibraryName(libname);
+        logger.debug("Attempting to load \"" + libname + "\".");
 
-        // We're in a static initializer and need a class. What shall we do?
-        Class cl = getClass();
+        String[] paths = {
+            "/",
+            "/bin/",
+            fatJarLibraryPath("static", flavor),
+            fatJarLibraryPathNonUnified("static", flavor),
+            fatJarLibraryPath("dynamic", flavor),
+            fatJarLibraryPathNonUnified("dynamic", flavor),
+        };
 
-        // Trying to copy from here.
-        logger.debug("Trying to copy from /" + libname + ".");
-        libpath = "/" + libname;
-        InputStream is = cl.getResourceAsStream("/" + libname);
+        InputStream is = findLibrary(paths, libname);
 
-        // Trying to copy from "bin"
+        // Oh man, have to get out of here!
         if (is == null) {
-            logger.debug("Trying to copy from /bin/" + libname + ".");
-            libpath = "/bin/" + libname;
-            is = cl.getResourceAsStream(libpath);
+            throw new UnsatisfiedLinkError("Couldn't find the resource " + libname + ".");
         }
 
-        // Trying to extract static version from the jar file. Why the static version?
-        // Because it is more likely to run.
-        if (is == null) {
-            libpath = fatJarLibraryPath("static");
-            logger.debug("Trying to copy from " + libpath + ".");
-            is = cl.getResourceAsStream(libpath + libname);
-        }
+        logger.config("Loading " + libname + " from " + libpath + ".");
+        loadLibraryFromStream(libname, is);
+    }
 
-        // Finally, let's see if we can get the dynamic version.
-        if (is == null) {
-            libpath = fatJarLibraryPath("dynamic");
-            logger.debug("Trying to copy from " + libpath + ".");
-            is = cl.getResourceAsStream(libpath + libname);
+    private InputStream findLibrary(String[] paths, String libname) {
+        InputStream is = null;
+        for (String path: paths) {
+            is = tryPath(path + libname);
+            if (is != null) {
+                libpath = path;
+                break;
+            }
         }
+        return is;
+    }
 
-        // And then we do it again for the "Non-Unified" path name.
-        // The reason is that changes in the build process might lead to actually
-        // having "Windows Vista" or something in the path... .
-        if (is == null) {
-            libpath = fatJarLibraryPathNonUnified("static");
-            if (withFlavor) libpath = addFlavor(libpath);
-            logger.debug("Trying to copy from " + libpath + ".");
-            is = cl.getResourceAsStream(libpath + libname);
+    /** Translate all those Windows to "Windows". ("Windows XP", "Windows Vista", "Windows 7", etc.) */
+    private String unifyOSName(String osname) {
+        if (osname.startsWith("Windows")) {
+            return "Windows";
         }
+        return osname;
+    }
 
-        // Finally, let's see if we can the static version with the unified
-        // path name.
-        if (is == null) {
-            libpath = fatJarLibraryPath("static");
-            if (withFlavor) libpath = addFlavor(libpath);
-            logger.debug("Trying to copy from " + libpath + ".");
-            is = cl.getResourceAsStream(libpath + libname);
-        }
+    /** Compute the path to the library. The path is basically
+    "/" + os.name + "/" + os.arch + "/" + libname. */
+    private String fatJarLibraryPath(String linkage, String flavor) {
+        String sep = "/"; //System.getProperty("file.separator");
+        String os_name = unifyOSName(System.getProperty("os.name"));
+        String os_arch = System.getProperty("os.arch");
+        String path = sep + "lib" + sep + linkage + sep + os_name + sep + os_arch + sep;
+        if (null != flavor)
+            path += flavor + sep;
+        return path;
+    }
 
-        // Oh man, have to get out of here!
-        if (is == null) {
-            throw new UnsatisfiedLinkError("Couldn't find the resource " + libname + ".");
-        }
+    /** Full path without the OS name non-unified. */
+    private String fatJarLibraryPathNonUnified(String linkage, String flavor) {
+        String sep = "/"; //System.getProperty("file.separator");
+        String os_name = System.getProperty("os.name");
+        String os_arch = System.getProperty("os.arch");
+        String path = sep + "lib" + sep + linkage + sep + os_name + sep + os_arch + sep;
+        if (null != flavor)
+            path += flavor + sep;
+        return path;
+    }
 
-        logger.config("Loading " + libname + " from " + libpath + ".");
+    /** Try to open a file at the given position. */
+    private InputStream tryPath(String path) {
+        Logger.getLogger().debug("Trying path \"" + path + "\".");
+        return getClass().getResourceAsStream(path);
+    }
 
+    /** Load a system library from a stream. Copies the library to a temp file
+     * and loads from there.
+     */
+    private void loadLibraryFromStream(String libname, InputStream is) {
         try {
             File tempfile = File.createTempFile("jblas", libname);
             tempfile.deleteOnExit();
@@ -148,37 +178,4 @@ public class LibraryLoader {
             logger.error("Couldn't load copied link file: " + ule.toString() + ".\n");
         }
     }
-
-    static public String unifyOSName(String osname) {
-        if (osname.startsWith("Windows")) {
-            return "Windows";
-        }
-        return osname;
-    }
-
-    /** Compute the path to the library. The path is basically
-    "/" + os.name + "/" + os.arch + "/" + libname. */
-    static public String fatJarLibraryPath(String linkage) {
-        String sep = "/"; //System.getProperty("file.separator");
-        String os_name = unifyOSName(System.getProperty("os.name"));
-        String os_arch = System.getProperty("os.arch");
-        String path = sep + "lib" + sep + linkage + sep + os_name + sep + os_arch + sep;
-        return path;
-    }
-
-    static public String fatJarLibraryPathNonUnified(String linkage) {
-        String sep = "/"; //System.getProperty("file.separator");
-        String os_name = System.getProperty("os.name");
-        String os_arch = System.getProperty("os.arch");
-        String path = sep + "lib" + sep + linkage + sep + os_name + sep + os_arch + sep;
-        return path;
-    }
-
-    static private String addFlavor(String path) {
-        String sep = "/";
-        String arch_flavor = ArchFlavor.archFlavor();
-        if (arch_flavor != null)
-            path += arch_flavor + sep;
-        return path;
-    }
 }
diff --git a/src/org/jblas/util/SanityChecks.java b/src/org/jblas/util/SanityChecks.java
index 72f7da5..a64007b 100644
--- a/src/org/jblas/util/SanityChecks.java
+++ b/src/org/jblas/util/SanityChecks.java
@@ -33,9 +33,10 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 // --- END LICENSE BLOCK ---
-
 package org.jblas.util;
 
+import org.jblas.ComplexDouble;
+import org.jblas.ComplexDoubleMatrix;
 import org.jblas.NativeBlas;
 import org.jblas.DoubleMatrix;
 
@@ -97,6 +98,7 @@ public class SanityChecks {
      */
     public static void checkXerbla() {
         double[] x = new double[9];
+        System.out.println("Check whether we're catching XERBLA errors. If you see something like \"** On entry to DGEMM  parameter number  4 had an illegal value\", it didn't work!");
         try {
             NativeBlas.dgemm('N', 'N', 3, -1, 3, 1.0, x, 0, 3, x, 0, 3, 0.0, x, 0, 3);
         } catch (IllegalArgumentException e) {
@@ -123,9 +125,46 @@ public class SanityChecks {
         check("checking existence of dsyev...", true);
     }
 
+    public static void checkSVD() {
+        double[][] data = new double[][]{
+            {1.0, 2.0, 3.0},
+            {4.0, 5.0, 6.0},
+            {7.0, 8.0, 9.0},
+            {-1.0, -2.0, -3.0}
+        };
+
+        DoubleMatrix A = new DoubleMatrix(data);
+
+        DoubleMatrix[] USV = org.jblas.Singular.sparseSVD(A);
+        System.out.println(USV[0].toString());
+        System.out.println(USV[1].toString());
+        System.out.println(USV[2].toString());
+
+        System.out.println(org.jblas.Singular.SVDValues(A));
+
+        /*ComplexDoubleMatrix[] AZB = org.jblas.Singular.sparseSVD(new ComplexDoubleMatrix(data));
+        System.out.println(AZB[0].toString());
+        System.out.println(AZB[1].toString());
+        System.out.println(AZB[2].toString());*/
+        check("checking existence of dgesvd...", true);
+    }
+
+    public static void checkComplexReturnValues() {
+        double[] data = new double[] {
+            1.0, 2.0, 3.0, 4.0, 5.0, 6.0
+        };
+
+        ComplexDoubleMatrix A = new ComplexDoubleMatrix(data);
+
+        ComplexDouble z = A.dotu(A);
+
+        System.out.print("Checking complex return values... ");
+        System.out.println("(z = " + z.toString() + ")");
+    }
+
     public static void main(String[] args) {
         Logger.getLogger().setLevel(Logger.CONFIG);
-        for (String arg: args) {
+        for (String arg : args) {
             if (arg.equals("--debug")) {
                 Logger.getLogger().setLevel(Logger.DEBUG);
             }
@@ -133,6 +172,8 @@ public class SanityChecks {
         checkVectorAddition();
         checkMatrixMultiplication();
         checkEigenvalues();
+        checkSVD();
+        checkComplexReturnValues();
         checkXerbla();
         printSummary();
     }
diff --git a/test/org/jblas/TestBlasDoubleComplex.java b/test/org/jblas/TestBlasDoubleComplex.java
index 75744f9..1708ee7 100644
--- a/test/org/jblas/TestBlasDoubleComplex.java
+++ b/test/org/jblas/TestBlasDoubleComplex.java
@@ -33,31 +33,40 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 // --- END LICENSE BLOCK ---
-
 package org.jblas;
 
 import junit.framework.TestCase;
 
-import org.jblas.ComplexDouble;
-
 public class TestBlasDoubleComplex extends TestCase {
 
-	public void testDotc() {
-		double[] a = new double[] { 1.0, 1.0, 2.0, 0.0, 3.0, 0.0 };
-		
-		ComplexDouble c = NativeBlas.zdotu(3, a, 0, 1, a, 0, 1);
-		System.out.println(c);
-	}
-
-	public void testAxpy() {
-		double[] x = new double[] { 0.0, -1.0 };
-		double[] y = new double[] { 0.0, 1.0 };
-		ComplexDouble a = new ComplexDouble(0.0, 1.0);
-		
-		NativeBlas.zdscal(1, 2.0, x, 0, 1);
-		assertEquals(new ComplexDouble(0.0, -2.0), new ComplexDouble(x[0], x[1]));
-		
-		NativeBlas.zaxpy(1, a, x, 0, 1, y, 0, 1);
-		assertEquals(new ComplexDouble(2.0, 1.0), new ComplexDouble(y[0], y[1]));
-	}
+    public void testZCOPY() {
+        double[] a = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0 };
+        double[] b = new double[6];
+        NativeBlas.zcopy(3, a, 0, 1, b, 0, 1);
+
+        for (int i = 0; i < 6; i++) {
+            assertEquals((double)(i+1), b[i]);
+        }
+    }
+
+    public void testZDOTU() {
+        double[] a = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
+
+        ComplexDouble c = NativeBlas.zdotu(3, a, 0, 1, a, 0, 1);
+        assertEquals(new ComplexDouble(-21.0, 88.0), c);
+    }
+
+    public void testAxpy() {
+        double[] x = {0.0, -1.0};
+        double[] y = {0.0, 1.0};
+        ComplexDouble a = new ComplexDouble(0.0, 1.0);
+
+        // compute x = 2 * x
+        NativeBlas.zdscal(1, 2.0, x, 0, 1);
+        assertEquals(new ComplexDouble(0.0, -2.0), new ComplexDouble(x[0], x[1]));
+
+        // compute -I * x + y
+        NativeBlas.zaxpy(1, a, x, 0, 1, y, 0, 1);
+        assertEquals(new ComplexDouble(2.0, 1.0), new ComplexDouble(y[0], y[1]));
+    }
 }
diff --git a/test/org/jblas/TestSolve.java b/test/org/jblas/TestSolve.java
index 1cd7f79..9521863 100644
--- a/test/org/jblas/TestSolve.java
+++ b/test/org/jblas/TestSolve.java
@@ -42,7 +42,12 @@ public class TestSolve extends TestCase {
 	public void testFaultySolveSymmetric() {
 		DoubleMatrix A = new DoubleMatrix(3, 3, 2d, 1d, 0d, 2d, 1d, 2d, 0d, 1d, 2d);
 		DoubleMatrix x = new DoubleMatrix(1, 3, 1d, 2d, 3d);
-		
-		Solve.solveSymmetric(A, x);
+
+                try {
+                    Solve.solveSymmetric(A, x);
+                    assertTrue(false);
+                } catch(IllegalArgumentException ex) {
+                    assertEquals("XERBLA: Error on argument 8 (LDB) in DSYSV", ex.getMessage());
+                }
 	}
 }

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-java/jblas.git