[med-svn] [ipig] 01/01: Imported Upstream version 0.0.r5

Andreas Tille tille at debian.org
Sat Feb 8 21:15:52 UTC 2014


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository ipig.

commit cb38fe4998addbf645c9ee76239e75dac507fb54
Author: Andreas Tille <tille at debian.org>
Date:   Sat Feb 8 22:14:59 2014 +0100

    Imported Upstream version 0.0.r5
---
 .classpath                                         |   11 +
 .project                                           |   17 +
 .settings/org.eclipse.jdt.core.prefs               |   12 +
 downloader                                         |    7 +
 downloader.cmd                                     |    3 +
 gc.conf                                            |   17 +
 gcgui                                              |    7 +
 gcgui.cmd                                          |    3 +
 ipig                                               |    7 +
 ipig.cmd                                           |    3 +
 ipig.conf                                          |   26 +
 ipiggui                                            |    7 +
 ipiggui.cmd                                        |    3 +
 license.txt                                        |   23 +
 readme.txt                                         |  126 ++
 res/help/downloader_help.txt                       |   24 +
 res/help/genecontrolgui_help.txt                   |   22 +
 res/help/ipig_help.txt                             |   30 +
 res/help/ipiggui_help.txt                          |   29 +
 res/images/rki.png                                 |  Bin 0 -> 1033 bytes
 res/organisms                                      |   79 ++
 res/xsd/FuGElightv1.0.0.xsd                        | 1118 ++++++++++++++++
 res/xsd/mzIdentML1.0.0.xsd                         | 1339 ++++++++++++++++++++
 src/de/rki/ng4/ipig/GeneControl.java               |  397 ++++++
 src/de/rki/ng4/ipig/Ipig.java                      |  311 +++++
 src/de/rki/ng4/ipig/data/Exporter.java             |  446 +++++++
 src/de/rki/ng4/ipig/data/Gene.java                 |  603 +++++++++
 src/de/rki/ng4/ipig/data/GeneSet.java              |  674 ++++++++++
 src/de/rki/ng4/ipig/data/MzIdentML.java            |  327 +++++
 src/de/rki/ng4/ipig/data/Peptide.java              |  431 +++++++
 src/de/rki/ng4/ipig/data/PeptideSet.java           |  717 +++++++++++
 src/de/rki/ng4/ipig/exceptions/ExitException.java  |   21 +
 .../rki/ng4/ipig/exceptions/FormatException.java   |   21 +
 src/de/rki/ng4/ipig/gui/Downloader.java            |  988 +++++++++++++++
 src/de/rki/ng4/ipig/gui/GeneControlGui.java        |  542 ++++++++
 src/de/rki/ng4/ipig/gui/IpigGui.java               |  762 +++++++++++
 src/de/rki/ng4/ipig/gui/OutputJTextArea.java       |  111 ++
 src/de/rki/ng4/ipig/gui/ProxyDialog.java           |  159 +++
 src/de/rki/ng4/ipig/mapping/Mapper.java            |  272 ++++
 src/de/rki/ng4/ipig/mapping/Position.java          |  198 +++
 src/de/rki/ng4/ipig/mapping/Validator.java         |  107 ++
 src/de/rki/ng4/ipig/mapping/WuManber.java          |  177 +++
 src/de/rki/ng4/ipig/tools/Configurator.java        |  185 +++
 src/de/rki/ng4/ipig/tools/Info.java                |   85 ++
 src/de/rki/ng4/ipig/tools/Logger.java              |   55 +
 src/de/rki/ng4/ipig/tools/MascotCSV2TXT.java       |  165 +++
 src/de/rki/ng4/ipig/tools/Translator.java          |  220 ++++
 47 files changed, 10887 insertions(+)

diff --git a/.classpath b/.classpath
new file mode 100644
index 0000000..1ae45a9
--- /dev/null
+++ b/.classpath
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+	<classpathentry excluding="**/.svn/*" kind="src" path="src"/>
+	<classpathentry excluding="**/.svn/*" kind="src" path="res"/>
+	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6"/>
+	<classpathentry kind="lib" path="lib/jdom.jar"/>
+	<classpathentry kind="lib" path="lib/xercesImpl.jar"/>
+	<classpathentry kind="lib" path="lib/commons-net-3.1.jar"/>
+	<classpathentry kind="lib" path="lib/commons-compress-1.4.1.jar"/>
+	<classpathentry kind="output" path="bin"/>
+</classpath>
diff --git a/.project b/.project
new file mode 100644
index 0000000..a9b9edc
--- /dev/null
+++ b/.project
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>ipig</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>org.eclipse.jdt.core.javabuilder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>org.eclipse.jdt.core.javanature</nature>
+	</natures>
+</projectDescription>
diff --git a/.settings/org.eclipse.jdt.core.prefs b/.settings/org.eclipse.jdt.core.prefs
new file mode 100644
index 0000000..2007b9d
--- /dev/null
+++ b/.settings/org.eclipse.jdt.core.prefs
@@ -0,0 +1,12 @@
+#Fri Jul 22 13:01:05 CEST 2011
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6
+org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
+org.eclipse.jdt.core.compiler.compliance=1.6
+org.eclipse.jdt.core.compiler.debug.lineNumber=generate
+org.eclipse.jdt.core.compiler.debug.localVariable=generate
+org.eclipse.jdt.core.compiler.debug.sourceFile=generate
+org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
+org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
+org.eclipse.jdt.core.compiler.source=1.6
diff --git a/downloader b/downloader
new file mode 100644
index 0000000..bab0dcb
--- /dev/null
+++ b/downloader
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+SOURCE="${BASH_SOURCE[0]}"
+while [ -h "$SOURCE" ] ; do SOURCE="$(readlink "$SOURCE")"; done
+DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
+
+java -Xms1024m -Xmx1024m -jar $DIR/iPiG.jar -downloader
diff --git a/downloader.cmd b/downloader.cmd
new file mode 100644
index 0000000..235209c
--- /dev/null
+++ b/downloader.cmd
@@ -0,0 +1,3 @@
+ at echo off
+
+java -Xms1024m -Xmx1024m -jar "%~dp0\iPiG.jar" -downloader
\ No newline at end of file
diff --git a/gc.conf b/gc.conf
new file mode 100644
index 0000000..077582b
--- /dev/null
+++ b/gc.conf
@@ -0,0 +1,17 @@
+# GeneControl configuration example
+# this config file is loaded by default
+# fit it to your needs or indicate/load a different config file per command-line execution resp. in the graphical user interface.
+
+# indicate needed files
+# 1. Gene Annotations
+# 2. Amino Acid Sequences
+# 3. Path for the output files (optional)
+# 4. Path to reference chromosomes
+FgeneAnnoFile	=	M:/Downloads/Data/human/knownGene.txt
+FgeneAaSeqFile	=	M:/Downloads/Data/human/knownGenePep.txt
+FoutputPath		=	M:/Downloads/Data/human/
+refSeqPath		=	M:/Downloads/Data/human/
+
+# define the desired minimum similarity between reference amino acid sequences and self translated sequences
+# comparisons of sequences with different lengths will result in a similarity of zero
+minSimilarity	=	0.95
\ No newline at end of file
diff --git a/gcgui b/gcgui
new file mode 100644
index 0000000..5617396
--- /dev/null
+++ b/gcgui
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+SOURCE="${BASH_SOURCE[0]}"
+while [ -h "$SOURCE" ] ; do SOURCE="$(readlink "$SOURCE")"; done
+DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
+
+java -Xms1024m -Xmx1024m -jar $DIR/iPiG.jar -controlgui
diff --git a/gcgui.cmd b/gcgui.cmd
new file mode 100644
index 0000000..6fe49d2
--- /dev/null
+++ b/gcgui.cmd
@@ -0,0 +1,3 @@
+ at echo off
+
+java -Xms1024m -Xmx1024m -jar "%~dp0\iPiG.jar" -controlgui
\ No newline at end of file
diff --git a/ipig b/ipig
new file mode 100644
index 0000000..4f31881
--- /dev/null
+++ b/ipig
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+SOURCE="${BASH_SOURCE[0]}"
+while [ -h "$SOURCE" ] ; do SOURCE="$(readlink "$SOURCE")"; done
+DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
+
+java -Xms1024m -Xmx1024m -jar $DIR/iPiG.jar $1 $2 $3 $4 $5
diff --git a/ipig.cmd b/ipig.cmd
new file mode 100644
index 0000000..d2a6a5b
--- /dev/null
+++ b/ipig.cmd
@@ -0,0 +1,3 @@
+ at echo off
+
+java -Xms1024m -Xmx1024m -jar "%~dp0\iPiG.jar" %1 %2 %3 %4 %5
\ No newline at end of file
diff --git a/ipig.conf b/ipig.conf
new file mode 100644
index 0000000..048323c
--- /dev/null
+++ b/ipig.conf
@@ -0,0 +1,26 @@
+# iPiG configuration example
+# this config file is loaded by default
+# fit it to your needs or indicate/load a different config file per command-line execution resp. in the graphical user interface.
+
+# indicate needed files
+# 1. Gene Annotations
+# 2. Amino Acid Sequences
+# 3. UniProt ID-mapping tab file
+# 4. Proteom fasta
+geneAnnoFile	=	M:/Downloads/Data/human/knownGene_good.txt
+geneAaSeqFile	=	M:/Downloads/Data/human/knownGenePep.txt
+protMapFile		=	M:/Downloads/Data/human/HUMAN_9606_idmapping_selected.tab
+protSeqFile		=	M:/Downloads/Data/human/HUMAN.fasta
+
+# define score boundaries for colors/suffixes in bed/gff3 output.
+# the interval [minScore,maxScore] (int) is divided into 3 parts represented by the 3 colors for bed or type suffixes in gff3.
+# the separation positions can be defined with threshold 1 and 2 (int or double), which have to be located in the interval.
+minScore	=	0
+maxScore	=	150
+threshold1	=	50
+threshold2	=	100
+
+# for bed output, define three rgb values (comma separated) to represent the 3 parts
+color1	=	191,191,191
+color2 	=	255,159,0
+color3 	=	0,255,0
\ No newline at end of file
diff --git a/ipiggui b/ipiggui
new file mode 100644
index 0000000..b572aec
--- /dev/null
+++ b/ipiggui
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+SOURCE="${BASH_SOURCE[0]}"
+while [ -h "$SOURCE" ] ; do SOURCE="$(readlink "$SOURCE")"; done
+DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
+
+java -Xms1024m -Xmx1024m -jar $DIR/iPiG.jar -gui
diff --git a/ipiggui.cmd b/ipiggui.cmd
new file mode 100644
index 0000000..d9a96e0
--- /dev/null
+++ b/ipiggui.cmd
@@ -0,0 +1,3 @@
+ at echo off
+
+java -Xms1024m -Xmx1024m -jar "%~dp0\iPiG.jar" -gui
\ No newline at end of file
diff --git a/license.txt b/license.txt
new file mode 100644
index 0000000..5dc0642
--- /dev/null
+++ b/license.txt
@@ -0,0 +1,23 @@
+Copyright (c) 2012, 
+Mathias Kuhring, KuhringM at rki.de, Robert Koch-Institute, Germany, 
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, 
+are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * The name of the author may not be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY 
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
+IN NO EVENT SHALL Mathias Kuhring BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT 
+OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/readme.txt b/readme.txt
new file mode 100644
index 0000000..2a7ae9e
--- /dev/null
+++ b/readme.txt
@@ -0,0 +1,126 @@
+iPiG - Integrating Peptide spectrum matches Into Genome browser visualizations
+
+
+Introduction
+------------
+
+iPiG targets the integration of peptide spectrum matches (PSMs) from mass spectrometry (MS) 
+peptide identifications into the genomic visualizations provided by genome browser 
+such as the UCSC genome browser (http://genome.ucsc.edu/).
+
+iPiG takes PSMs from the MS standard format mzIdentML (*.mzid) and provides results in
+genome track formats (BED and GFF3 files), which can be easily imported into genome browsers.
+
+For more details about iPiG and it's functioning, please see
+
+	"iPiG: Integrating Peptide Spectrum Matches Into Genome Browser Visualizations"
+	Mathias Kuhring and Bernhard Y. Renard
+	(submitted manuscript)
+
+PLEASE NOTE, it is recommended to read the paper and this readme.txt file first, 
+followed by the wiki pages provided at the project webpage: https://sourceforge.net/projects/ipig/
+
+	
+Instructions
+------------
+	
+iPiG comes  along with two additional tools: There is the mapping procedure itself (iPiG), 
+an optional gene quality control (GeneControl) and download tool helping to get the necessary data (Downloader).
+
+An example of using iPiG, GeneControl and Downloader is provided in the wiki.
+
+GeneControl checks the integrity of the required gene annotations and the consistency between
+the annotations and the corresponding amino acids sequences.
+Thus, it is recommended to run it once for the gene annotations used.
+
+
+System Requirements
+-------------------
+
+iPiG is developed in Java version 6, thus it is platform independent,
+but requires a Java Runtime Environment (JRE) in version 6.
+Version 7 is not fully tested.
+
+The JRE can be downloaded at the Oracle webpage:
+http://www.oracle.com/technetwork/java/javase/downloads/index.html
+
+
+User Interfaces
+---------------
+
+iPiG provides a command line interface (CLI) as well as a graphical user interface (GUI).
+
+Execution scripts are provided for Windows and Linux. Type "ipig" without any 
+parameters to get a short help about parameter using.
+
+On Linux platforms, it might be necessary to set execution rights for the scripts.
+This can be done with "chmod +x ipig", "chmod +x ipiggui", "chmod +x gcgui",  resp. "chmod +x downloader".
+
+You might add the iPiG directory to your PATH variable to use it directly in your data folders for instance.
+Please consult your operating systems manual on how to set the PATH variable.
+
+In general, all interfaces (including GeneControl) could be addressed by executing the iPiG 
+script file, which is recommended using instead of executing the iPiG.jar file with Java itself.
+
+The GUIs of iPiG, GeneControl and Downloader should be started with the additional scripts,
+like "ipiggui", "gcgui" or "downloader", though no further parameters are passed by the gui scripts yet.
+
+
+Data Requirements (iPiG)
+------------------------
+
+iPig requires information about gene locations and protein-gene connections. Thus several files have to be provided. 
+See in the wiki for recommended sources and how to indicate them.
+
+1.) A file with peptide spectrum matches (PSMs), best in mzIdentML format.
+    Alternatively, a tab separated text file with particular columns (see ipig.pdf).
+	
+2.) The annotations of a reference genome in UCSC table format (*.txt).
+
+3.) A file containing the corresponding amino acid translations in UCSC table format (*.txt).
+	
+4.) Optional but highly recommended: A Uniprot ID-mapping file (tab-delimited, *.tab).
+	Source: ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/
+
+5.) Optional: A proteome in fasta format, e.g. containing the proteins used for peptide identification (*.fasta).
+
+For more details about the formats and for examples, 
+please have look in the wiki and at the EXAMPLES folder.
+
+
+Data Requirements (GeneControl)
+-------------------------------
+
+1.) The annotations of a reference genome in UCSC table format (*.txt).
+
+2.) A file, containing the corresponding amino acid translations in UCSC table format (*.txt).
+
+3.) Reference sequences of the chromosomes in fasta format, indicated by a path.
+	Chromosomes must be one per file each and files must be named
+    like the chromosomes in the annotations (e.g. chr11.fa, chrY.fa, chrIV.fa etc.).
+
+
+-----------------------------------------------------------------------------------------
+Copyright (c) 2012, 
+Mathias Kuhring, KuhringM at rki.de, Robert Koch-Institute, Germany, 
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, 
+are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * The name of the author may not be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY 
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
+IN NO EVENT SHALL Mathias Kuhring BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT 
+OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/res/help/downloader_help.txt b/res/help/downloader_help.txt
new file mode 100644
index 0000000..4107092
--- /dev/null
+++ b/res/help/downloader_help.txt
@@ -0,0 +1,24 @@
+Downloader GUI quick help
+
+1.)
+Choose the organism depending on the source of the PSMs.
+
+2.)
+Choose the data to download for the selected organism.
+To run iPiG the gene annotations, amino acid sequences 
+and the id mappings are necessary. The proteome can 
+sometimes provide additional mapping information.
+To run GeneControl the gene annotations, 
+amino acid sequences and the genome are necessary.
+
+3.)
+Choose a path where the data can be saved/extracted.
+
+4.)
+Select if the downloads should be extracted.
+
+5.)
+If necessary, set up a proxy which is open for ftp data.
+
+6.)
+Start the downloads.
\ No newline at end of file
diff --git a/res/help/genecontrolgui_help.txt b/res/help/genecontrolgui_help.txt
new file mode 100644
index 0000000..ade48df
--- /dev/null
+++ b/res/help/genecontrolgui_help.txt
@@ -0,0 +1,22 @@
+GeneControl GUI quick help
+(for further help please refer to the readme.txt)
+
+1.) Indicate the annotations of a reference genome 
+    in UCSC table format.
+
+2.) Indicate the corresponding amino acid translations 
+    in UCSC table format (*.txt).
+
+3.) Indicate the path to reference chromosomes in 
+    fasta format. Chromosomes must be one per file each 
+    and files must be named like the chromosomes in the 
+    annotations (e.g. chr11.fa, chrY.fa, chrIV.fa etc.).
+    
+4.) Optionally, indicate a path for the output 
+    files (execution path by default).
+    
+5.) Define the desired minimum similarity between 
+    reference amino acid sequences and 
+    self translated sequences
+    
+9.) Run the control by pressing the Start-button.
\ No newline at end of file
diff --git a/res/help/ipig_help.txt b/res/help/ipig_help.txt
new file mode 100644
index 0000000..9d3b45f
--- /dev/null
+++ b/res/help/ipig_help.txt
@@ -0,0 +1,30 @@
+iPiG - Integrating PSMs Into Genome browser visualizations
+
+command:
+ ipig <psm file>|-g|-c|-cg [<config file>]
+
+parameters:
+ <psm file>         indicates the file with the 
+                    peptide spectrum matches (mzid/txt)	
+ 
+ -g, -gui           starts the graphical user interface of iPiG
+ 
+ -c, -control       starts the gene control, necessary files have 
+                    to be indicated in the configuration file
+ 
+ -cg, -controlgui   starts the graphical user interface of the gene control
+ 
+ -d, -downloader    starts the download gui
+ 
+ <config file>      a different configuration file can be indicated 
+                    (otherwise ipig.conf is loaded by default)
+	
+additional requirements:
+ using a non-gui mode, a config file (ipig.conf by default) has to contain 
+ several additional parameters, e.g. indicating the reference genome etc.
+ 
+ in a gui mode (-g and -cg), additional parameters can be indicated two ways, 
+ within the interface or with a config file as well.
+ 
+ have a look into readme.txt and ipig.conf for examples and 
+ more details about the additional parameters
\ No newline at end of file
diff --git a/res/help/ipiggui_help.txt b/res/help/ipiggui_help.txt
new file mode 100644
index 0000000..0ed1806
--- /dev/null
+++ b/res/help/ipiggui_help.txt
@@ -0,0 +1,29 @@
+iPiG GUI quick help
+(for further help please refer to the readme.txt)
+
+1.) Indicate a file with peptide spectrum matches 
+    (*.mzid or *.txt).
+	
+2.) Optionally, indicate a path for the output 
+    files (execution path by default).
+	
+3.) Indicate annotations of a reference genome in 
+    UCSC table format (*.txt).
+
+4.) Indicate the corresponding amino acid 
+    translations in UCSC table format (*.txt).
+	
+5.) Optional but highly recommended, indicate a 
+    Uniprot ID-mapping file (*.tab).
+
+6.) Optionally, indicate a proteome in fasta 
+    format (*.fasta).
+
+7.) For the outputs give the score range of the 
+    PSMs and define thresholds to separate PSMs 
+    in three scoring groups.
+
+8.) For the BED output give three RGB colors that 
+    indicates the scoring group of each PSM.
+
+9.) Run the mapping by pressing the Start-button.
\ No newline at end of file
diff --git a/res/images/rki.png b/res/images/rki.png
new file mode 100644
index 0000000..c47c4e5
Binary files /dev/null and b/res/images/rki.png differ
diff --git a/res/organisms b/res/organisms
new file mode 100644
index 0000000..474f39f
--- /dev/null
+++ b/res/organisms
@@ -0,0 +1,79 @@
+org=Caenorhabditis elegans
+genes=hgdownload.cse.ucsc.edu/goldenPath/ce10/database/ensGene.txt.gz
+aaseqs=hgdownload.cse.ucsc.edu/goldenPath/ce10/database/ensPep.txt.gz
+idmappings=ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/CAEEL_6239_idmapping_selected.tab.gz
+prots=ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/proteomes/CAEEL.fasta.gz
+chroms=hgdownload.cse.ucsc.edu/goldenPath/ce10/bigZips/chromFa.tar.gz
+
+org=Danio rerio (Zebrafish)
+genes=hgdownload.cse.ucsc.edu/goldenPath/danRer7/database/ensGene.txt.gz
+aaseqs=hgdownload.cse.ucsc.edu/goldenPath/danRer7/database/ensPep.txt.gz
+idmappings=ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/DANRE_7955_idmapping_selected.tab.gz
+prots=ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/proteomes/DANRE.fasta.gz
+chroms=hgdownload.cse.ucsc.edu/goldenPath/danRer6/chromosomes/chr1.fa.gz
+chroms=hgdownload.cse.ucsc.edu/goldenPath/danRer6/chromosomes/chr2.fa.gz
+chroms=hgdownload.cse.ucsc.edu/goldenPath/danRer6/chromosomes/chr3.fa.gz
+chroms=hgdownload.cse.ucsc.edu/goldenPath/danRer6/chromosomes/chr4.fa.gz
+chroms=hgdownload.cse.ucsc.edu/goldenPath/danRer6/chromosomes/chr5.fa.gz
+chroms=hgdownload.cse.ucsc.edu/goldenPath/danRer6/chromosomes/chr6.fa.gz
+chroms=hgdownload.cse.ucsc.edu/goldenPath/danRer6/chromosomes/chr7.fa.gz
+chroms=hgdownload.cse.ucsc.edu/goldenPath/danRer6/chromosomes/chr8.fa.gz
+chroms=hgdownload.cse.ucsc.edu/goldenPath/danRer6/chromosomes/chr9.fa.gz
+chroms=hgdownload.cse.ucsc.edu/goldenPath/danRer6/chromosomes/chr10.fa.gz
+chroms=hgdownload.cse.ucsc.edu/goldenPath/danRer6/chromosomes/chr11.fa.gz
+chroms=hgdownload.cse.ucsc.edu/goldenPath/danRer6/chromosomes/chr12.fa.gz
+chroms=hgdownload.cse.ucsc.edu/goldenPath/danRer6/chromosomes/chr13.fa.gz
+chroms=hgdownload.cse.ucsc.edu/goldenPath/danRer6/chromosomes/chr14.fa.gz
+chroms=hgdownload.cse.ucsc.edu/goldenPath/danRer6/chromosomes/chr16.fa.gz
+chroms=hgdownload.cse.ucsc.edu/goldenPath/danRer6/chromosomes/chr17.fa.gz
+chroms=hgdownload.cse.ucsc.edu/goldenPath/danRer6/chromosomes/chr18.fa.gz
+chroms=hgdownload.cse.ucsc.edu/goldenPath/danRer6/chromosomes/chr19.fa.gz
+chroms=hgdownload.cse.ucsc.edu/goldenPath/danRer6/chromosomes/chr20.fa.gz
+chroms=hgdownload.cse.ucsc.edu/goldenPath/danRer6/chromosomes/chr21.fa.gz
+chroms=hgdownload.cse.ucsc.edu/goldenPath/danRer6/chromosomes/chr22.fa.gz
+chroms=hgdownload.cse.ucsc.edu/goldenPath/danRer6/chromosomes/chr23.fa.gz
+chroms=hgdownload.cse.ucsc.edu/goldenPath/danRer6/chromosomes/chr24.fa.gz
+chroms=hgdownload.cse.ucsc.edu/goldenPath/danRer6/chromosomes/chr25.fa.gz
+chroms=hgdownload.cse.ucsc.edu/goldenPath/danRer6/chromosomes/chrM.fa.gz
+
+org=Drosophila melanogaster (Fruit fly)
+genes=hgdownload.cse.ucsc.edu/goldenPath/dm3/database/ensGene.txt.gz
+aaseqs=hgdownload.cse.ucsc.edu/goldenPath/dm3/database/ensPep.txt.gz
+idmappings=ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/DROME_7227_idmapping_selected.tab.gz
+prots=ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/proteomes/DROME.fasta.gz
+chroms=hgdownload.cse.ucsc.edu/goldenPath/dm3/bigZips/chromFa.tar.gz
+
+org=Gallus gallus (Chicken)
+genes=hgdownload.cse.ucsc.edu/goldenPath/galGal3/database/ensGene.txt.gz
+aaseqs=hgdownload.cse.ucsc.edu/goldenPath/galGal3/database/ensPep.txt.gz
+idmappings=ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/CHICK_9031_idmapping_selected.tab.gz
+prots=ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/proteomes/CHICK.fasta.gz
+chroms=hgdownload.cse.ucsc.edu/goldenPath/galGal3/bigZips/chromFa.tar.gz
+
+org=Homo sapiens (Human)
+genes=hgdownload.cse.ucsc.edu/goldenPath/hg19/database/knownGene.txt.gz
+aaseqs=hgdownload.cse.ucsc.edu/goldenPath/hg19/database/knownGenePep.txt.gz
+idmappings=ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/HUMAN_9606_idmapping_selected.tab.gz
+prots=ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/proteomes/HUMAN.fasta.gz
+chroms=hgdownload.cse.ucsc.edu/goldenPath/hg19/bigZips/chromFa.tar.gz
+
+org=Mus musculus (Mouse)
+genes=hgdownload.cse.ucsc.edu/goldenPath/mm10/database/ensGene.txt.gz
+aaseqs=hgdownload.cse.ucsc.edu/goldenPath/mm10/database/ensPep.txt.gz
+idmappings=ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/MOUSE_10090_idmapping_selected.tab.gz
+prots=ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/proteomes/MOUSE.fasta.gz
+chroms=hgdownload.cse.ucsc.edu/goldenPath/mm10/bigZips/chromFa.tar.gz
+
+org=Rattus norvegicus (Rat)
+genes=hgdownload.cse.ucsc.edu/goldenPath/rn4/database/ensGene.txt.gz
+aaseqs=hgdownload.cse.ucsc.edu/goldenPath/rn4/database/ensPep.txt.gz
+idmappings=ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/RAT_10116_idmapping_selected.tab.gz
+prots=ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/proteomes/RAT.fasta.gz
+chroms=hgdownload.cse.ucsc.edu/goldenPath/rn4/bigZips/chromFa.tar.gz
+
+org=Saccharomyces cerevisiae (Yeast)
+genes=hgdownload.cse.ucsc.edu/goldenPath/sacCer3/database/ensGene.txt.gz
+aaseqs=hgdownload.cse.ucsc.edu/goldenPath/sacCer3/database/ensPep.txt.gz
+idmappings=ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/YEAST_559292_idmapping_selected.tab.gz
+prots=ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/proteomes/YEAST.fasta.gz
+chroms=hgdownload.cse.ucsc.edu/goldenPath/sacCer3/bigZips/chromFa.tar.gz
\ No newline at end of file
diff --git a/res/xsd/FuGElightv1.0.0.xsd b/res/xsd/FuGElightv1.0.0.xsd
new file mode 100644
index 0000000..fa84ae2
--- /dev/null
+++ b/res/xsd/FuGElightv1.0.0.xsd
@@ -0,0 +1,1118 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- FuGELight schema, generated by manual edition of the official FuGE v1 schema 
+	Distributed under the Creative Commons license http://creativecommons.org/licenses/by/2.0/.
+-->
+<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
+            elementFormDefault="qualified"
+            version="1.0.0">
+    
+	<xsd:complexType name="FuGE.Bio.ConceptualMolecule.ConceptualMoleculeType" abstract="true">
+		<xsd:annotation>
+			<xsd:documentation> An abstract class for describing database entries of biological molecules such as DNA and protein sequences, metabolites or lipids etc. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.IdentifiableType"/>			
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Bio.Data.DataType" abstract="true">
+		<xsd:annotation>
+			<xsd:documentation> Data can be an input to or an output from a ProtocolApplication. Data may be produced from a Material (data acquisition) or from another Data object (data transformation).  Examples of Data are gene expression measurements, or phenotypes associated with genetic manipulations. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.IdentifiableType"/>			
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Bio.Data.ExternalDataType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> Data external to the XML instance document. The location of the data file is given in the location attribute. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Bio.Data.DataType">
+				<xsd:sequence>
+					<xsd:element name="externalFormatDocumentation" minOccurs="0" type="xsd:anyURI">
+						<xsd:annotation>
+							<xsd:documentation> A URI to access documentation and tools to interpret the external format of the ExternalData instance. For example, XML Schema or static libraries (APIs) to access binary formats. 
+         					 </xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+					<xsd:element name="fileFormat" minOccurs="0">
+						<xsd:annotation>
+							<xsd:documentation> The format of the ExternalData file, for example "tiff" for image files.</xsd:documentation>
+						</xsd:annotation>
+						<xsd:complexType>
+							<xsd:sequence>
+								<xsd:element ref="cvParam"/>
+							</xsd:sequence>
+						</xsd:complexType>
+					</xsd:element>
+				</xsd:sequence>
+				<xsd:attribute name="location" type="xsd:anyURI" use="required">
+					<xsd:annotation>
+						<xsd:documentation> The location of the data file. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Bio.Data.InternalDataType" abstract="true">
+		<xsd:annotation>
+			<xsd:documentation> Data that is internal to an instance XML document.   InternalData can be extended with an element that defines a particular encoding or data type for the storage array or the subclass GenericInternalData should be instantiated.   The array will typically use pointer arithmetic to access values based on the rank (e.g. number of)  Dimensions and their respective sizes (e.g. the number of contained DimensionElements). </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Bio.Data.DataType"/>			
+		</xsd:complexContent>
+	</xsd:complexType>	
+	<xsd:complexType name="FuGE.Bio.Material.MaterialType" abstract="true">
+		<xsd:annotation>
+			<xsd:documentation> Material represents any kind of substance used in an experimental workflow, such as whole organisms, cells, DNA, solutions, compounds and experimental substances (gels, arrays etc.). The Material class can be extended by adding subclasses to model domain specific properties, or the relationships to OntologyIndividual can be used to describe the characteristics and type of Material.   Materials can be related to other materials through a directed acyclic graph (repr [...]
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.IdentifiableType">
+				<xsd:sequence>
+					<xsd:element ref="ContactRole" minOccurs="0" maxOccurs="unbounded">
+						<xsd:annotation>
+							<xsd:documentation> Contact details for the Material. The association to ContactRole could specify, for example, the creator or provider of the Material. </xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+                    <xsd:group ref="ParamGroup" minOccurs="0" maxOccurs="unbounded">
+                        <xsd:annotation>
+                            <xsd:documentation> The characteristics of a Material. </xsd:documentation>
+                        </xsd:annotation>
+                    </xsd:group>
+				</xsd:sequence>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Bio.Material.MaterialMeasurementType" abstract="true">
+		<xsd:annotation>
+			<xsd:documentation> MaterialMeasurement represents the quantity of a source Material used in a ProtocolApplication. MaterialMeasurement is abstract and should be extended if a subclass of ProtocolApplication has to be associated with a measured source of a subclass of Material. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:sequence>
+            <xsd:group ref="MeasurementGroup" minOccurs="0">
+                <xsd:annotation>
+                    <xsd:documentation> The value of the measured source of material. </xsd:documentation>
+                </xsd:annotation>
+            </xsd:group>
+		</xsd:sequence>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Bio.Material.GenericMaterialType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> A subclass of the abstract Material class, which should be used in conjunction with controlled vocabulary terms to describe Materials of any types used in an investigation. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Bio.Material.MaterialType">
+				<xsd:sequence>
+					<xsd:element name="components" minOccurs="0" maxOccurs="unbounded">
+						<xsd:annotation>
+							<xsd:documentation> Association from a GenericMaterial to other GenericMaterials that are sub-components (such as wells within an array plate). If a subcomponent undergoes a ProtocolApplication, then the containing GenericMaterial must also be an input to the ProtocolApplication and be output as a new GenericMaterial or version of the GenericMaterial. </xsd:documentation>
+						</xsd:annotation>
+						<xsd:complexType>
+							<xsd:attribute name="GenericMaterial_ref" type="xsd:string" use="required"/>
+						</xsd:complexType>
+					</xsd:element>
+				</xsd:sequence>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Bio.Material.GenericMaterialMeasurementType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> A subclass of MaterialMeasurement to be used without being extended in conjunction with GenericProtocolApplication and GenericMaterial to model measured sources of materials. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Bio.Material.MaterialMeasurementType">
+				<xsd:attribute name="Material_ref" type="xsd:string" use="required">
+					<xsd:annotation>
+						<xsd:documentation> The instance of GenericMaterial or a subclass of Material that is input to the GenericProtocolApplication. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Common.Audit.PersonType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> A person for which the attributes are self describing. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.Audit.ContactType">
+				<xsd:sequence>
+					<xsd:element name="affiliations" minOccurs="0" maxOccurs="unbounded">
+						<xsd:annotation>
+							<xsd:documentation> The organization a person belongs to. </xsd:documentation>
+						</xsd:annotation>
+						<xsd:complexType>
+							<xsd:attribute name="Organization_ref" type="xsd:string" use="required"/>
+						</xsd:complexType>
+					</xsd:element>
+				</xsd:sequence>
+				<xsd:attribute name="lastName" type="xsd:string">
+					<xsd:annotation>
+						<xsd:documentation> The Person's last/family name. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="firstName" type="xsd:string">
+					<xsd:annotation>
+						<xsd:documentation> The Person's first name. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="midInitials" type="xsd:string">
+					<xsd:annotation>
+						<xsd:documentation> The Person's middle initial. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Common.Audit.OrganizationType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> Organizations are entities like companies, universities, government agencies for which the attributes are self describing. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.Audit.ContactType">
+				<xsd:sequence>
+					<xsd:element name="parent" minOccurs="0">
+						<xsd:annotation>
+							<xsd:documentation> The containing organization (the university or business which a lab belongs to, etc.) </xsd:documentation>
+						</xsd:annotation>
+						<xsd:complexType>
+							<xsd:attribute name="Organization_ref" type="xsd:string" use="required"/>
+						</xsd:complexType>
+					</xsd:element>
+				</xsd:sequence>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Common.Audit.ContactType" abstract="true">
+		<xsd:annotation>
+			<xsd:documentation> A contact is either a person or an organization. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.IdentifiableType">
+				<xsd:attribute name="address" type="xsd:string">
+					<xsd:annotation>
+						<xsd:documentation> The address of the Contact. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="phone" type="xsd:string">
+					<xsd:annotation>
+						<xsd:documentation> The telephone number of the Contact including the suitable area codes. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="email" type="xsd:string">
+					<xsd:annotation>
+						<xsd:documentation> The email address of the Contact. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="fax" type="xsd:string">
+					<xsd:annotation>
+						<xsd:documentation> The fax number of the Contact. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="tollFreePhone" type="xsd:string">
+					<xsd:annotation>
+						<xsd:documentation> A toll free phone number for the Contact, including suitable area codes. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Common.Audit.ContactRoleType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> The role that a Contact plays in an organization or with respect to the associating class.  A Contact may have several Roles within scope, and as such, associations to ContactRole allow the use of a Contact in a certain manner.  Examples might include a provider, or a data analyst. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:sequence>
+			<xsd:element name="role">
+				<xsd:annotation>
+					<xsd:documentation> The roles (lab equipment sales, contractor, etc.) the Contact fills. </xsd:documentation>
+				</xsd:annotation>
+				<xsd:complexType>
+					<xsd:sequence>
+						<xsd:element ref="cvParam"/>
+					</xsd:sequence>
+				</xsd:complexType>
+			</xsd:element>
+		</xsd:sequence>
+		<xsd:attribute name="Contact_ref" type="xsd:string" use="required">
+			<xsd:annotation>
+				<xsd:documentation> When a ContactRole is used, it specifies which Contact the role is associated with. </xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Common.References.BibliographicReferenceType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> Represents bibliographic references, including the most common attributes. Note that because a BibliographicReference is Identifiable, a DatabaseEntry can also be specified. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.IdentifiableType">
+				<xsd:attribute name="authors" type="xsd:string">
+					<xsd:annotation>
+						<xsd:documentation> The names of the authors of the reference. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="publication" type="xsd:string">
+					<xsd:annotation>
+						<xsd:documentation> The name of the journal, book etc. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="publisher" type="xsd:string">
+					<xsd:annotation>
+						<xsd:documentation> The publisher of the publication. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="editor" type="xsd:string">
+					<xsd:annotation>
+						<xsd:documentation> The editor(s) of the reference. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="year" type="xsd:int">
+					<xsd:annotation>
+						<xsd:documentation> The year of publication. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="volume" type="xsd:string">
+					<xsd:annotation>
+						<xsd:documentation> The volume name or number. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="issue" type="xsd:string">
+					<xsd:annotation>
+						<xsd:documentation> The issue name or number. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="pages" type="xsd:string">
+					<xsd:annotation>
+						<xsd:documentation> The page numbers. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="title" type="xsd:string">
+					<xsd:annotation>
+						<xsd:documentation> The title of the BibliographicReference. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Common.References.DatabaseType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> An address to a repository. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.IdentifiableType">
+				<xsd:sequence>
+					<xsd:element ref="ContactRole" minOccurs="0" maxOccurs="unbounded">
+						<xsd:annotation>
+							<xsd:documentation> Contacts associated with this database, such as hosting institution, database curators, manager etc. </xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+				</xsd:sequence>
+				<xsd:attribute name="version" type="xsd:string">
+					<xsd:annotation>
+						<xsd:documentation> The version of the Database. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="URI" type="xsd:anyURI">
+					<xsd:annotation>
+						<xsd:documentation> The location of the Database. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Common.References.DatabaseReferenceType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> A reference to a record in a database. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:attribute name="accession" type="xsd:string" use="required">
+			<xsd:annotation>
+				<xsd:documentation> The identifier used to look up the record. </xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+		<xsd:attribute name="accessionVersion" type="xsd:string">
+			<xsd:annotation>
+				<xsd:documentation> The appropriate version of the accession (if applicable). </xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+		<xsd:attribute name="Database_ref" type="xsd:string" use="required">
+			<xsd:annotation>
+				<xsd:documentation> Reference to the database where the DatabaseEntry instance can be found. </xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Common.Description.DescriptionType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> A free text description of an object. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:attribute name="text" type="xsd:string">
+			<xsd:annotation>
+				<xsd:documentation> The description. </xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Common.Protocol.ProtocolType" abstract="true">
+		<xsd:annotation>
+			<xsd:documentation> A Protocol is a parameterizable description of a method.  ProtocolApplication is used to specify the ParameterValues of its Protocol's Parameters.   Protocol should be extended in data formats. For cases where no extension is developed, the subclass of Protocol, GenericProtocol, should be used to capture experimental protocols. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.IdentifiableType"/>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Common.Protocol.ParameterType" abstract="true">
+		<xsd:annotation>
+			<xsd:documentation> A Parameter is a replaceable value in a Parameterizable class, and uses the Measurement class for giving a specific type of value.  Examples of Parameters include: scanning wavelength, laser power, centrifuge speed, multiplicative errors, the number of input nodes to a SOM, and PCR temperatures.   Parameter is abstract and should be extended by subclassing. The GenericParameter class offers the functionality of a parameter defined by a controlled vocabulary term. < [...]
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.IdentifiableType">
+				<xsd:sequence>
+                    <xsd:group ref="MeasurementGroup" minOccurs="0">
+                        <xsd:annotation>
+                            <xsd:documentation> The default value for this parameter. </xsd:documentation>
+                        </xsd:annotation>
+                    </xsd:group>
+				</xsd:sequence>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Common.Protocol.ParameterValueType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> The runtime value of a Parameter. </xsd:documentation>
+		</xsd:annotation>
+				<xsd:sequence>
+					<xsd:element name="value" type="xsd:string">
+						<xsd:annotation>
+							<xsd:documentation/>
+						</xsd:annotation>
+					</xsd:element>
+				</xsd:sequence>
+				<xsd:attribute name="Parameter_ref" type="xsd:string" use="required">
+					<xsd:annotation>
+						<xsd:documentation> The parameter this value is for. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Common.Protocol.ProtocolApplicationType" abstract="true">
+		<xsd:annotation>
+			<xsd:documentation> The use of a protocol with the requisite Parameters and ParameterValues. ProtocolApplications can take Material or Data (or both) as input and produce Material or Data (or both) as output. ProtocolApplication is abstract and should be subclassed in the development of modular formats. The subclass GenericProtocolApplication can be used without extension. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.IdentifiableType">
+				<xsd:attribute name="activityDate" type="xsd:dateTime">
+					<xsd:annotation>
+						<xsd:documentation> When the protocol was applied. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Common.Protocol.SoftwareType" abstract="true">
+		<xsd:annotation>
+			<xsd:documentation> Software used in the Protocol.  Examples of Software include: feature extraction software, clustering software, etc...   Software is abstract and should either be extended by subclassing or the GenericSoftware class, a functional version of Software, should be used without extension. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.IdentifiableType">
+				<xsd:sequence>
+					<xsd:element ref="ContactRole" minOccurs="0"/>
+				</xsd:sequence>
+				<xsd:attribute name="version" type="xsd:string">
+					<xsd:annotation>
+						<xsd:documentation> The version of Software used. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Common.Protocol.EquipmentType" abstract="true">
+		<xsd:annotation>
+			<xsd:documentation> The equipment (hardware) used in the Protocol. Examples include: computers, scanners, wash stations etc...   Equipment is abstract and should either be extended by subclassing or the GenericEquipment class, a functional version of Equipment, should be used. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.IdentifiableType">
+				<xsd:sequence>
+                    <xsd:element name="make" type="ParamType" minOccurs="0">
+                        <xsd:annotation>
+                            <xsd:documentation> The equipment make, usually the organization that makes the equipment. </xsd:documentation>
+                        </xsd:annotation>
+                    </xsd:element>
+                    <xsd:element name="model" type="ParamType" minOccurs="0">
+                        <xsd:annotation>
+                            <xsd:documentation> The Equipment model. </xsd:documentation>
+                        </xsd:annotation>
+                    </xsd:element>
+				</xsd:sequence>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Common.Protocol.EquipmentApplicationType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> The use of a piece of Equipment with the requisite Parameters and ParameterValues. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.Protocol.ParameterizableApplicationType">
+				<xsd:attribute name="serialNumber" type="xsd:string">
+					<xsd:annotation>
+						<xsd:documentation> Manufacturer's identifier for the Equipment. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="Equipment_ref" type="xsd:string" use="required">
+					<xsd:annotation>
+						<xsd:documentation> The Equipment that was used in the ProtocolApplication. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>				
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Common.Protocol.SoftwareApplicationType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> The use of a piece of software with the requisite Parameters and ParameterValues. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.Protocol.ParameterizableApplicationType">
+				<xsd:attribute name="Software_ref" type="xsd:string" use="required">
+					<xsd:annotation>
+						<xsd:documentation> The Software that was used in the ProtocolApplication. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Common.Protocol.ParameterizableApplicationType" abstract="true">
+		<xsd:annotation>
+			<xsd:documentation> The interface that is the use of a Parameterizable class. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.IdentifiableType">
+				<xsd:sequence>
+					<xsd:element ref="ParameterValue" minOccurs="0" maxOccurs="unbounded">
+						<xsd:annotation>
+							<xsd:documentation> The parameter values for this Parameterizable Application. </xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+				</xsd:sequence>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Common.Protocol.ActionType" abstract="true">
+		<xsd:annotation>
+			<xsd:documentation> An Action is one step of a Protocol. Sets of ordered Actions define the Protocol. Action is abstract and can be extended to specify particular types of steps within a subclass of Protocol. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.IdentifiableType">
+				<xsd:attribute name="actionOrdinal" type="xsd:int">
+					<xsd:annotation>
+						<xsd:documentation> The order by which this Action should occur with respect to the Parent protocol. Note that two Actions with the same actionOrdinal are performed in parallel or the order is not important. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Common.Protocol.ActionApplicationType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> ActionApplication mirrors the structure of Actions within Protocols and allows ActionDeviations to be reported.   RULE:   If an Action references a childProtocol, an ActionApplication must be created and reference a childProtocolApplication of the corresponding type (or GenericProtocolApplication) to mirror the Protocol structure.   ActionApplications can be (but need not be) created for simple Actions. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.Protocol.ParameterizableApplicationType">
+				<xsd:sequence>
+					<xsd:element name="actionDeviation" minOccurs="0">
+						<xsd:annotation>
+							<xsd:documentation> Used to define human readable notes for annotating deviations to an Action during the ActionApplication. 
+          </xsd:documentation>
+						</xsd:annotation>
+						<xsd:complexType>
+							<xsd:sequence>
+								<xsd:element ref="Description"/>
+							</xsd:sequence>
+						</xsd:complexType>
+					</xsd:element>
+				</xsd:sequence>
+				<xsd:attribute name="Action_ref" type="xsd:string" use="required">
+					<xsd:annotation>
+						<xsd:documentation> Association to the Action that is being performed as recorded by this ActionApplication. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="ProtocolApplication_ref" type="xsd:string">
+					<xsd:annotation>
+						<xsd:documentation> Association to ProtocolApplication if a hierarchical Protocol has been defined representing the child step of the Protocol. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Common.Protocol.GenericProtocolType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> GenericProtocol should be used with GenericAction, GenericParameter or protocolText to describe protocols in a data format for cases where no explicit extension of Protocol has been developed. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.Protocol.ProtocolType">
+				<xsd:sequence>
+					<xsd:element name="protocolText" type="xsd:string" minOccurs="0">
+						<xsd:annotation>
+							<xsd:documentation> The text that applies to the entire Protocol. </xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+					<xsd:element name="software" minOccurs="0" maxOccurs="unbounded">
+						<xsd:annotation>
+							<xsd:documentation> A reference to the GenericSoftware used with this GenericProtocol. </xsd:documentation>
+						</xsd:annotation>
+						<xsd:complexType>
+							<xsd:attribute name="GenericSoftware_ref" type="xsd:string" use="required"/>
+						</xsd:complexType>
+					</xsd:element>
+					<xsd:element name="equipment" minOccurs="0" maxOccurs="unbounded">
+						<xsd:annotation>
+							<xsd:documentation> A reference to the GenericProtocol used with this GenericEquipment. </xsd:documentation>
+						</xsd:annotation>
+						<xsd:complexType>
+							<xsd:attribute name="GenericEquipment_ref" type="xsd:string" use="required"/>
+						</xsd:complexType>
+					</xsd:element>
+					<xsd:element ref="GenericParameter" minOccurs="0" maxOccurs="unbounded">
+						<xsd:annotation>
+							<xsd:documentation> The parameters defined for the GenericProtocol. </xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+					<xsd:element ref="GenericAction" minOccurs="0" maxOccurs="unbounded">
+						<xsd:annotation>
+							<xsd:documentation> The actions performed within a GenericProtocol. </xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+				</xsd:sequence>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Common.Protocol.GenericActionType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> A GenericAction represents a step within a GenericProtocol. It allows a reference to a sub-GenericProtocol, user entered text to describe the GenericAction or a term from a controlled vocabulary to be given. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.Protocol.ActionType">
+				<xsd:sequence>
+					<xsd:element name="actionTerm" minOccurs="0">
+						<xsd:annotation>
+							<xsd:documentation> Each GenericAction may be associated with an OntologyTerm that defines that GenericAction. </xsd:documentation>
+						</xsd:annotation>
+						<xsd:complexType>
+							<xsd:sequence>
+								<xsd:element ref="cvParam"/>
+							</xsd:sequence>
+						</xsd:complexType>
+					</xsd:element>
+					<xsd:element ref="GenericParameter" minOccurs="0" maxOccurs="unbounded">
+						<xsd:annotation>
+							<xsd:documentation> The parameters belonging to the GenericAction. </xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+					<xsd:element ref="ParameterPair" minOccurs="0" maxOccurs="unbounded">
+						<xsd:annotation>
+							<xsd:documentation> ParameterPairs owned by the GenericAction. The TargetParameter should reference a Parameter owned by a child Protocol which is also referenced by the GenericAction. </xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+				</xsd:sequence>
+				<xsd:attribute name="actionText" type="xsd:string">
+					<xsd:annotation>
+						<xsd:documentation> Free text to describe this Action. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="Protocol_ref" type="xsd:string">
+					<xsd:annotation>
+						<xsd:documentation> This is an instantiation of the ChildProtocol association for associating complex, hierarchical protocols together. A GenericAction can reference substeps within the parent GenericProtocol. The substeps can be further instances of GenericProtocol, or defined subclasses of Protocol. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Common.Protocol.GenericSoftwareType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> A subclass of the abstract Software class for capturing the description of Software used. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.Protocol.SoftwareType">
+				<xsd:sequence>
+					<xsd:element name="equipment" minOccurs="0" maxOccurs="unbounded">
+						<xsd:annotation>
+							<xsd:documentation> Associates GenericEquipment with GenericSoftware to indicate that instances of software are run on piece(s) of Equipment. </xsd:documentation>
+						</xsd:annotation>
+						<xsd:complexType>
+							<xsd:attribute name="GenericEquipment_ref" type="xsd:string" use="required"/>
+						</xsd:complexType>
+					</xsd:element>
+					<xsd:element ref="GenericParameter" minOccurs="0" maxOccurs="unbounded">
+						<xsd:annotation>
+							<xsd:documentation> The parameters for this piece of GenericSoftware. </xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+				</xsd:sequence>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Common.Protocol.GenericEquipmentType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> A subclass of the abstract Equipment class for capturing the description of Equipment used. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.Protocol.EquipmentType">
+				<xsd:sequence>
+					<xsd:element name="software" minOccurs="0" maxOccurs="unbounded">
+						<xsd:annotation>
+							<xsd:documentation> Associates GenericEquipment with GenericSoftware to indicate that instances of software are run on piece(s) of Equipment. </xsd:documentation>
+						</xsd:annotation>
+						<xsd:complexType>
+							<xsd:attribute name="GenericSoftware_ref" type="xsd:string" use="required"/>
+						</xsd:complexType>
+					</xsd:element>
+					<xsd:element ref="GenericParameter" minOccurs="0" maxOccurs="unbounded">
+						<xsd:annotation>
+							<xsd:documentation> The parameters for this piece of GenericEquipment. </xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+					<xsd:element name="equipmentParts" minOccurs="0" maxOccurs="unbounded">
+						<xsd:annotation>
+							<xsd:documentation> An association between a parent GenericEquipment and its parts. If a GenericProtocol utilises GenericEquipment and its parts, both the parent and child GenericEquipment instances should be referenced. </xsd:documentation>
+						</xsd:annotation>
+						<xsd:complexType>
+							<xsd:attribute name="GenericEquipment_ref" type="xsd:string" use="required"/>
+						</xsd:complexType>
+					</xsd:element>
+				</xsd:sequence>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Common.Protocol.GenericParameterType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> A subclass of the abstract Parameter class to represent a parameter that is defined by a controlled vocabulary term. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.Protocol.ParameterType">
+				<xsd:sequence>
+					<xsd:element name="parameterType" minOccurs="0">
+						<xsd:annotation>
+							<xsd:documentation> The name of the parameter. </xsd:documentation>
+						</xsd:annotation>
+						<xsd:complexType>
+							<xsd:sequence>
+								<xsd:element ref="cvParam"/>
+							</xsd:sequence>
+						</xsd:complexType>
+					</xsd:element>
+				</xsd:sequence>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Common.Protocol.GenericProtocolApplicationType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> A subclass of ProtocolApplication to be used as it is in data formats without being extended. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.Protocol.ProtocolApplicationType">
+				<xsd:sequence>
+					<xsd:element ref="GenericMaterialMeasurement" minOccurs="0" maxOccurs="unbounded">
+						<xsd:annotation>
+							<xsd:documentation> Measured sources of material that are inputs to this GenericProtocolApplication. </xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+					<xsd:element name="inputData" minOccurs="0" maxOccurs="unbounded">
+						<xsd:annotation>
+							<xsd:documentation> The instances of GenericData that are inputs to the GenericProtocolApplication. </xsd:documentation>
+						</xsd:annotation>
+						<xsd:complexType>
+							<xsd:attribute name="Data_ref" type="xsd:string" use="required"/>
+						</xsd:complexType>
+					</xsd:element>
+					<xsd:element name="outputData" minOccurs="0" maxOccurs="unbounded">
+						<xsd:annotation>
+							<xsd:documentation> The Data produced from the GenericProtocolApplication. </xsd:documentation>
+						</xsd:annotation>
+						<xsd:complexType>
+							<xsd:attribute name="Data_ref" type="xsd:string" use="required"/>
+						</xsd:complexType>
+					</xsd:element>
+					<xsd:element name="outputMaterials" minOccurs="0" maxOccurs="unbounded">
+						<xsd:annotation>
+							<xsd:documentation> The instances of GenericMaterial or a subclass of Material that are produced from the GenericProtocolApplication. </xsd:documentation>
+						</xsd:annotation>
+						<xsd:complexType>
+							<xsd:attribute name="Material_ref" type="xsd:string" use="required"/>
+						</xsd:complexType>
+					</xsd:element>
+					<xsd:element name="inputCompleteMaterials" minOccurs="0" maxOccurs="unbounded">
+						<xsd:annotation>
+							<xsd:documentation> Whole Materials that are inputs to the GenericProtocolApplication. </xsd:documentation>
+						</xsd:annotation>
+						<xsd:complexType>
+							<xsd:attribute name="Material_ref" type="xsd:string" use="required"/>
+						</xsd:complexType>
+					</xsd:element>
+					<xsd:element ref="ParameterValue" minOccurs="0" maxOccurs="unbounded">
+						<xsd:annotation>
+							<xsd:documentation> The parameter values for this Parameterizable Application. </xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>					
+				</xsd:sequence>
+				<xsd:attribute name="Protocol_ref" type="xsd:string" use="required">
+					<xsd:annotation>
+						<xsd:documentation> A reference to the GenericProtocol or subclass of Protocol that has been used with this GenericProtocolApplication. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Common.Protocol.ParameterPairType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> A pairing of an output parameter from a Protocol (SourceParameter) with an input parameter to a separate Protocol (TargetParameter) to indicate that the ParameterValue will be the same. </xsd:documentation>
+		</xsd:annotation>
+				<xsd:sequence>
+					<xsd:element name="targetParameter">
+						<xsd:annotation>
+							<xsd:documentation> A Parameter that is an input to a process which has been an output from another process. </xsd:documentation>
+						</xsd:annotation>
+						<xsd:complexType>
+							<xsd:attribute name="Parameter_ref" type="xsd:string" use="required"/>
+						</xsd:complexType>
+					</xsd:element>
+					<xsd:element name="sourceParameter">
+						<xsd:annotation>
+							<xsd:documentation> A Parameter that is output from one process which will be input to another process. </xsd:documentation>
+						</xsd:annotation>
+						<xsd:complexType>
+							<xsd:attribute name="Parameter_ref" type="xsd:string" use="required"/>
+						</xsd:complexType>
+					</xsd:element>
+				</xsd:sequence>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Common.Ontology.ParamType" abstract="true">
+		<xsd:annotation>
+			<xsd:documentation>Abstract entity allowing either cvParam or userParam to be referenced in other schemas.</xsd:documentation>
+		</xsd:annotation>
+		<xsd:attribute name="name" type="xsd:string" use="required">
+			<xsd:annotation>
+				<xsd:documentation>The name of the parameter.</xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+		<xsd:attribute name="value" type="xsd:string" use="optional">
+			<xsd:annotation>
+				<xsd:documentation>The user-entered value of the parameter.</xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+		<xsd:attribute name="unitAccession" type="xsd:string">
+			<xsd:annotation>
+				<xsd:documentation>An accession number identifying the unit within the OBO foundry Unit CV.</xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+		<xsd:attribute name="unitName" type="xsd:string">
+			<xsd:annotation>
+				<xsd:documentation>The name of the unit.</xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+		<xsd:attribute name="unitCvRef" type="xsd:string" use="optional">
+			<xsd:annotation>
+				<xsd:documentation>If a unit term is referenced, this attribute must refer to the CV 'id' attribute defined in the cvList in this file.</xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Common.Ontology.userParamType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation>A single user-defined parameter.</xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.Ontology.ParamType"/>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Common.Ontology.cvParamType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> A single entry from an ontology or a controlled vocabulary.</xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.Ontology.ParamType">
+				<xsd:attribute name="cvRef" type="xsd:string" use="required">
+					<xsd:annotation>
+						<xsd:documentation>A reference to the cv element from which this term originates.</xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="accession" type="xsd:string" use="required">
+					<xsd:annotation>
+						<xsd:documentation>The accession or ID number of this CV term in the source CV.</xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Common.Ontology.PropertyValue" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> A single value and unit combination.</xsd:documentation>
+		</xsd:annotation>
+		<xsd:attribute name="value" type="xsd:string" use="required">
+			<xsd:annotation>
+				<xsd:documentation>A user-entered value for the parameter.</xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+		<xsd:attribute name="unitAccession" type="xsd:string">
+			<xsd:annotation>
+				<xsd:documentation>The accession number of the unit term in the source unit CV.</xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+		<xsd:attribute name="unitName" type="xsd:string">
+			<xsd:annotation>
+				<xsd:documentation>The name of the unit.</xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+		<xsd:attribute name="unitCvRef" type="xsd:string" use="optional">
+			<xsd:annotation>
+				<xsd:documentation>If a unit term is referenced, this attribute must refer to the CV 'id' attribute defined in the cvList in this file.</xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Common.Ontology.cvType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation>A source controlled vocabulary from which cvParams will be obtained.</xsd:documentation>
+		</xsd:annotation>
+		<xsd:attribute name="fullName" type="xsd:string" use="required">
+			<xsd:annotation>
+				<xsd:documentation>The full name of the CV.</xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+		<xsd:attribute name="version" type="xsd:string">
+			<xsd:annotation>
+				<xsd:documentation>The version of the CV.</xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+		<xsd:attribute name="URI" type="xsd:anyURI" use="required">
+			<xsd:annotation>
+				<xsd:documentation>The URI of the source CV.</xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+		<xsd:attribute name="id" type="xsd:string" use="required">
+			<xsd:annotation>
+				<xsd:documentation>The unique identifier of this cv within the document to be referenced by cvParam elements.</xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Common.IdentifiableType" abstract="true">
+		<xsd:annotation>
+			<xsd:documentation> Other classes in the model can be specified as sub-classes, inheriting from Identifiable. Identifiable gives classes a unique identifier within the scope and a name that need not be unique. Identifiable also provides a mechanism for annotating objects with BibliographicReference(s) and DatabaseEntry(s). </xsd:documentation>
+		</xsd:annotation>
+				<xsd:attribute name="id" type="xsd:string" use="required">
+					<xsd:annotation>
+						<xsd:documentation> An identifier is an unambiguous string that is unique within the scope (i.e. a document, a set of related documents, or a repository) of its use. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="name" type="xsd:string">
+					<xsd:annotation>
+						<xsd:documentation> The potentially ambiguous common identifier, such as a human-readable name for the instance. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Common.Measurement.AtomicValueType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> An atomic value i.e. one that has a single value. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.Measurement.MeasurementType">
+				<xsd:sequence>
+					<xsd:element ref="PropertyValue"/>
+				</xsd:sequence>
+			</xsd:extension>			
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Common.Measurement.BooleanValueType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> A Boolean value. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.Measurement.MeasurementType">
+				<xsd:attribute name="value" type="xsd:boolean" use="required">
+					<xsd:annotation>
+						<xsd:documentation> A Boolean default value for the parameter. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Common.Measurement.ComplexValueType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> A complex default value for the Parameter, such as a term from a controlled list or a function. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.Measurement.MeasurementType">
+				<xsd:sequence>
+					<!--<xsd:element ref="Param" maxOccurs="unbounded"/>-->
+                    <xsd:group ref="ParamGroup" maxOccurs="unbounded"/>
+				</xsd:sequence>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Common.Measurement.RangeType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> A range value. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.Measurement.MeasurementType">
+				<xsd:sequence>
+					<xsd:element name="lowerLimit">
+						<xsd:annotation>
+							<xsd:documentation> The lower limit of a range value. </xsd:documentation>
+						</xsd:annotation>
+						<xsd:complexType>
+							<xsd:sequence>
+								<xsd:element ref="PropertyValue"/>
+							</xsd:sequence>
+						</xsd:complexType>
+					</xsd:element>
+					<xsd:element name="upperLimit">
+						<xsd:annotation>
+							<xsd:documentation> The lower limit of a range value. </xsd:documentation>
+						</xsd:annotation>
+						<xsd:complexType>
+							<xsd:sequence>
+								<xsd:element ref="PropertyValue"/>
+							</xsd:sequence>
+						</xsd:complexType>
+					</xsd:element>
+					<xsd:element name="rangeDescriptors" minOccurs="0" maxOccurs="unbounded">
+						<xsd:annotation>
+							<xsd:documentation> Ontology terms can be used the define the semantics of the lower and upper limit in the range, for example the inclusivity of the values or what the values correspond to in the annotated object. </xsd:documentation>
+						</xsd:annotation>
+						<xsd:complexType>
+							<xsd:sequence>
+								<xsd:element ref="cvParam"/>
+							</xsd:sequence>
+						</xsd:complexType>
+					</xsd:element>
+				</xsd:sequence>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Common.Measurement.MeasurementType" abstract="true">
+		<xsd:annotation>
+			<xsd:documentation> Abstract superclass representing different methods of supplying a value or measurement. </xsd:documentation>
+		</xsd:annotation>
+				<xsd:sequence>
+                    <!--
+					<xsd:element name="dataType" minOccurs="0">
+						<xsd:annotation>
+							<xsd:documentation> The data type of the Parameter such as Boolean, integer, String. </xsd:documentation>
+						</xsd:annotation>
+						<xsd:complexType>
+							<xsd:sequence>
+								<xsd:element ref="cvParam"/>
+							</xsd:sequence>
+						</xsd:complexType>
+					</xsd:element>
+					-->
+                    <xsd:element name="dataType" type="FuGE.Common.Ontology.cvParamType" minOccurs="0">
+                        <xsd:annotation>
+                            <xsd:documentation> The data type of the Parameter such as Boolean, integer, String. </xsd:documentation>
+                        </xsd:annotation>
+                    </xsd:element>
+				</xsd:sequence>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Collection.ReferenceableCollectionType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> The collection of objects that allow external references. </xsd:documentation>
+		</xsd:annotation>
+				<xsd:sequence>
+					<xsd:element ref="BibliographicReference" minOccurs="0" maxOccurs="unbounded">
+						<xsd:annotation>
+							<xsd:documentation> Reference to the complete set of BibliographicReference objects in the FuGE document. </xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+					<xsd:element ref="Database" minOccurs="0" maxOccurs="unbounded">
+						<xsd:annotation>
+							<xsd:documentation> Reference to the complete set of Database objects in the FuGE document. </xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+				</xsd:sequence>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Collection.AuditCollectionType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> The collection of Contact records. </xsd:documentation>
+		</xsd:annotation>
+				<xsd:sequence>
+                    <!--
+					<xsd:element ref="Contact" minOccurs="0" maxOccurs="unbounded">
+						<xsd:annotation>
+							<xsd:documentation> The complete set of Contacts. </xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+					-->
+                    <xsd:group ref="ContactGroup" minOccurs="0" maxOccurs="unbounded">
+                        <xsd:annotation>
+                            <xsd:documentation> The complete set of Contacts. </xsd:documentation>
+                        </xsd:annotation>
+                    </xsd:group>
+				</xsd:sequence>
+	</xsd:complexType>
+	<xsd:complexType name="FuGE.Collection.ProviderType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> The provider of the document in terms of the Contact and the software the produced the document instance. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.IdentifiableType">
+				<xsd:sequence>
+					<xsd:element ref="ContactRole" minOccurs="0">
+						<xsd:annotation>
+							<xsd:documentation> The Contact that provided the document instance. </xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+				</xsd:sequence>
+				<xsd:attribute name="Software_ref" type="xsd:string">
+					<xsd:annotation>
+						<xsd:documentation> The Software that produced the document instance. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	
+	<xsd:complexType name="ParamType">
+        <xsd:group ref="ParamGroup"/>
+	</xsd:complexType>
+    <xsd:complexType name="ParamListType">
+        <xsd:group ref="ParamGroup" maxOccurs="unbounded"/>
+    </xsd:complexType>
+    <xsd:group name="ParamGroup">
+        <xsd:choice>
+            <xsd:element name="cvParam" type="FuGE.Common.Ontology.cvParamType"/>
+            <xsd:element name="userParam" type="FuGE.Common.Ontology.userParamType"/>
+        </xsd:choice>
+    </xsd:group>
+    <xsd:group name="MeasurementGroup">
+        <xsd:choice>
+            <xsd:element name="AtomicValue" type="FuGE.Common.Measurement.AtomicValueType"/>
+            <xsd:element name="BooleanValue" type="FuGE.Common.Measurement.BooleanValueType"/>
+            <xsd:element name="ComplexValue" type="FuGE.Common.Measurement.ComplexValueType"/>
+            <xsd:element name="Range" type="FuGE.Common.Measurement.RangeType"/>
+        </xsd:choice>
+    </xsd:group>
+    <xsd:group name="ContactGroup">
+        <xsd:choice>
+            <xsd:element name="Person" type="FuGE.Common.Audit.PersonType"/>
+            <xsd:element name="Organization" type="FuGE.Common.Audit.OrganizationType"/>
+        </xsd:choice>
+    </xsd:group>
+
+	<xsd:element name="ExternalData" type="FuGE.Bio.Data.ExternalDataType" abstract="false"/>
+	<xsd:element name="GenericMaterial" type="FuGE.Bio.Material.GenericMaterialType" abstract="false"/>
+	<xsd:element name="GenericMaterialMeasurement" type="FuGE.Bio.Material.GenericMaterialMeasurementType" abstract="false"/>
+	<xsd:element name="Person" type="FuGE.Common.Audit.PersonType" abstract="false"/>
+	<xsd:element name="Organization" type="FuGE.Common.Audit.OrganizationType" abstract="false"/>
+	<xsd:element name="ContactRole" type="FuGE.Common.Audit.ContactRoleType" abstract="false"/>
+	<xsd:element name="BibliographicReference" type="FuGE.Common.References.BibliographicReferenceType" abstract="false"/>
+	<xsd:element name="Database" type="FuGE.Common.References.DatabaseType" abstract="false"/>
+	<xsd:element name="DatabaseReference" type="FuGE.Common.References.DatabaseReferenceType" abstract="false"/>
+	<xsd:element name="Description" type="FuGE.Common.Description.DescriptionType" abstract="false"/>
+	<xsd:element name="userParam" type="FuGE.Common.Ontology.userParamType" abstract="false"/>
+	<xsd:element name="ParameterValue" type="FuGE.Common.Protocol.ParameterValueType" abstract="false"/>
+	<xsd:element name="EquipmentApplication" type="FuGE.Common.Protocol.EquipmentApplicationType" abstract="false"/>
+	<xsd:element name="SoftwareApplication" type="FuGE.Common.Protocol.SoftwareApplicationType" abstract="false"/>
+	<xsd:element name="ActionApplication" type="FuGE.Common.Protocol.ActionApplicationType" abstract="false"/>
+	<xsd:element name="GenericProtocol" type="FuGE.Common.Protocol.GenericProtocolType" abstract="false"/>
+	<xsd:element name="GenericAction" type="FuGE.Common.Protocol.GenericActionType" abstract="false"/>
+	<xsd:element name="GenericSoftware" type="FuGE.Common.Protocol.GenericSoftwareType" abstract="false"/>
+	<xsd:element name="GenericEquipment" type="FuGE.Common.Protocol.GenericEquipmentType" abstract="false"/>
+	<xsd:element name="GenericParameter" type="FuGE.Common.Protocol.GenericParameterType" abstract="false"/>
+	<xsd:element name="GenericProtocolApplication" type="FuGE.Common.Protocol.GenericProtocolApplicationType" abstract="false"/>
+	<xsd:element name="ParameterPair" type="FuGE.Common.Protocol.ParameterPairType" abstract="false"/>
+	<xsd:element name="cvParam" type="FuGE.Common.Ontology.cvParamType" abstract="false"/>
+	<xsd:element name="cv" type="FuGE.Common.Ontology.cvType" abstract="false"/>	
+	<xsd:element name="ReferenceableCollection" type="FuGE.Collection.ReferenceableCollectionType" abstract="false"/>
+	<xsd:element name="AuditCollection" type="FuGE.Collection.AuditCollectionType" abstract="false"/>
+	<xsd:element name="Provider" type="FuGE.Collection.ProviderType" abstract="false"/>
+	<xsd:element name="PropertyValue" type="FuGE.Common.Ontology.PropertyValue" abstract="false"/>
+</xsd:schema>
diff --git a/res/xsd/mzIdentML1.0.0.xsd b/res/xsd/mzIdentML1.0.0.xsd
new file mode 100644
index 0000000..e5ae8d6
--- /dev/null
+++ b/res/xsd/mzIdentML1.0.0.xsd
@@ -0,0 +1,1339 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- 
+mzIdentML version 1.0.0
+Distributed under the Creative Commons license http://creativecommons.org/licenses/by/2.0/. 
+-->
+<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns="http://psidev.info/psi/pi/mzIdentML/1.0" xmlns:psi-pi="http://psidev.info/psi/pi/mzIdentML/1.0" targetNamespace="http://psidev.info/psi/pi/mzIdentML/1.0" elementFormDefault="qualified" version="1.0.0">
+	<xsd:include schemaLocation="FuGElightv1.0.0.xsd"/>
+	<xsd:element name="mzIdentML" type="PSI-PI.Main.mzIdentMLType" abstract="false">
+		<xsd:unique name="PK_SEQ">
+			<xsd:selector xpath="./psi-pi:SequenceCollection/*"/>
+			<xsd:field xpath="@id"/>
+		</xsd:unique>
+		<xsd:unique name="PK_ANA">
+			<xsd:selector xpath="./psi-pi:AnalysisCollection/*"/>
+			<xsd:field xpath="@id"/>
+		</xsd:unique>
+		<xsd:unique name="PK_ANAPROTO">
+			<xsd:selector xpath="./psi-pi:AnalysisProtocolCollection/*"/>
+			<xsd:field xpath="@id"/>
+		</xsd:unique>
+		<xsd:unique name="PK_DATAIN">
+			<xsd:selector xpath="./psi-pi:DataCollection/psi-pi:Inputs/*"/>
+			<xsd:field xpath="@id"/>
+		</xsd:unique>
+		<xsd:unique name="PK_DATAAD">
+			<xsd:selector xpath="./psi-pi:DataCollection/psi-pi:AnalysisData/*"/>
+			<xsd:field xpath="@id"/>
+		</xsd:unique>
+		<xsd:unique name="PK_DATAADSIL">
+			<xsd:selector xpath="./psi-pi:DataCollection/psi-pi:AnalysisData/psi-pi:SpectrumIdentificationList/*"/>
+			<xsd:field xpath="@id"/>
+		</xsd:unique>
+		<xsd:unique name="PK_DATAADSILSIR">
+			<xsd:selector xpath="./psi-pi:DataCollection/psi-pi:AnalysisData/psi-pi:SpectrumIdentificationList/psi-pi:SpectrumIdentificationResult/*"/>
+			<xsd:field xpath="@id"/>
+		</xsd:unique>
+		<xsd:unique name="PK_DATAADSILSIRPE">
+			<xsd:selector xpath="./psi-pi:DataCollection/psi-pi:AnalysisData/psi-pi:SpectrumIdentificationList/psi-pi:SpectrumIdentificationResult/psi-pi:SpectrumIdentificationItem/*"/>
+			<xsd:field xpath="@id"/>
+		</xsd:unique>
+		<xsd:unique name="PK_DATAADPDL">
+			<xsd:selector xpath="./psi-pi:DataCollection/psi-pi:AnalysisData/psi-pi:ProteinDetectionList/*"/>
+			<xsd:field xpath="@id"/>
+		</xsd:unique>
+		<xsd:unique name="PK_DATAADPDLPAG">
+			<xsd:selector xpath="./psi-pi:DataCollection/psi-pi:AnalysisData/psi-pi:ProteinDetectionList/psi-pi:ProteinAmbiguityGroup/*"/>
+			<xsd:field xpath="@id"/>
+		</xsd:unique>
+		<xsd:unique name="PK_ANASW">
+			<xsd:selector xpath="./psi-pi:AnalysisSoftwareList/*"/>
+			<xsd:field xpath="@id"/>
+		</xsd:unique>
+		<xsd:unique name="PK_PROV">
+			<xsd:selector xpath="./psi-pi:Provider/*"/>
+			<xsd:field xpath="@id"/>
+		</xsd:unique>
+		<xsd:unique name="PK_AUDIT">
+			<xsd:selector xpath="./psi-pi:AuditCollection/*"/>
+			<xsd:field xpath="@id"/>
+		</xsd:unique>
+		<xsd:unique name="PK_SAMPLE">
+			<xsd:selector xpath="./psi-pi:AnalysisSampleCollection/*"/>
+			<xsd:field xpath="@id"/>
+		</xsd:unique>
+		<xsd:unique name="PK_CV">
+			<xsd:selector xpath="./psi-pi:cvList/*"/>
+			<xsd:field xpath="@id"/>
+		</xsd:unique>
+		<xsd:unique name="PK_FragMeasure">
+			<xsd:selector xpath="./psi-pi:DataCollection/psi-pi:AnalysisData/psi-pi:SpectrumIdentificationList/psi-pi:FragmentationTable/psi-pi:Measure"/>
+			<xsd:field xpath="@id"/>
+		</xsd:unique>
+		<xsd:unique name="PK_MassTable">
+			<xsd:selector xpath="./psi-pi:AnalysisProtocolCollection/psi-pi:SpectrumIdentificationProtocol/psi-pi:MassTable"/>
+			<xsd:field xpath="@id"/>
+		</xsd:unique>
+		<xsd:unique name="PF_Bibref">
+			<xsd:selector xpath="./psi-pi:BibliographicReference"/>
+			<xsd:field xpath="@id"/>
+		</xsd:unique>
+		<xsd:keyref name="FK_SII_PEP" refer="psi-pi:PK_SEQ">
+			<xsd:selector xpath="./psi-pi:DataCollection/psi-pi:AnalysisData/psi-pi:SpectrumIdentificationList/psi-pi:SpectrumIdentificationResult/SpectrumIdentificationItem"/>
+			<xsd:field xpath="@Peptide_ref"/>
+		</xsd:keyref>
+		<xsd:keyref name="FK_SIIPEV_DBSEQ" refer="psi-pi:PK_SEQ">
+			<xsd:selector xpath="./psi-pi:DataCollection/psi-pi:AnalysisData/psi-pi:SpectrumIdentificationList/psi-pi:SpectrumIdentificationResult/psi-pi:SpectrumIdentificationItem/psi-pi:PeptideEvidence"/>
+			<xsd:field xpath="@DBSequence_Ref"/>
+		</xsd:keyref>
+		<xsd:keyref name="FK_DBSEQ_SDB" refer="psi-pi:PK_DATAIN">
+			<xsd:selector xpath="./psi-pi:SequenceCollection/psi-pi:DBSequence"/>
+			<xsd:field xpath="@SearchDatabase_ref"/>
+		</xsd:keyref>
+		<xsd:keyref name="FK_SI_SIP" refer="psi-pi:PK_ANAPROTO">
+			<xsd:selector xpath="./psi-pi:AnalysisCollection/psi-pi:SpectrumIdentification"/>
+			<xsd:field xpath="@SpectrumIdentificationProtocol_ref"/>
+		</xsd:keyref>
+		<xsd:keyref name="FK_SI_SIL" refer="psi-pi:PK_DATAAD">
+			<xsd:selector xpath="./psi-pi:AnalysisCollection/psi-pi:SpectrumIdentification"/>
+			<xsd:field xpath="@SpectrumIdentificationList_ref"/>
+		</xsd:keyref>
+		<xsd:keyref name="FK_SISDB_SDB" refer="psi-pi:PK_DATAIN">
+			<xsd:selector xpath="./psi-pi:AnalysisCollection/psi-pi:SpectrumIdentification/psi-pi:SearchDatabase"/>
+			<xsd:field xpath="@SearchDatabase_ref"/>
+		</xsd:keyref>
+		<xsd:keyref name="FK_SISDB_SD" refer="psi-pi:PK_DATAIN">
+			<xsd:selector xpath="./psi-pi:AnalysisCollection/psi-pi:SpectrumIdentification/psi-pi:SpectraData_ref"/>
+			<xsd:field xpath="@ref"/>
+		</xsd:keyref>
+		<xsd:keyref name="FK_PD_PDP" refer="psi-pi:PK_ANAPROTO">
+			<xsd:selector xpath="./psi-pi:AnalysisCollection/psi-pi:ProteinDetection"/>
+			<xsd:field xpath="@ProteinDetectionProtocol_ref"/>
+		</xsd:keyref>
+		<xsd:keyref name="FK_PD_PDL" refer="psi-pi:PK_DATAAD">
+			<xsd:selector xpath="./psi-pi:AnalysisCollection/psi-pi:ProteinDetection"/>
+			<xsd:field xpath="@ProteinDetectionList_ref"/>
+		</xsd:keyref>
+		<xsd:keyref name="FK_PDSIL_SIL" refer="psi-pi:PK_DATAAD">
+			<xsd:selector xpath="./psi-pi:AnalysisCollection/psi-pi:ProteinDetection/psi-pi:SpectrumIdentificationList_ref"/>
+			<xsd:field xpath="@ref"/>
+		</xsd:keyref>
+		<xsd:keyref name="FK_APSIP_ASW" refer="psi-pi:PK_ANASW">
+			<xsd:selector xpath="./psi-pi:AnalysisProtocolCollection/psi-pi:SpectrumIdentificationProtocol"/>
+			<xsd:field xpath="@AnalysisSoftware_ref"/>
+		</xsd:keyref>
+		<xsd:keyref name="FK_APPDP_ASW" refer="psi-pi:PK_ANASW">
+			<xsd:selector xpath="./psi-pi:AnalysisProtocolCollection/psi-pi:ProteinDetectionProtocol"/>
+			<xsd:field xpath="@AnalysisSoftware_ref"/>
+		</xsd:keyref>
+		<xsd:keyref name="FK_DATAADSILSIR_SD" refer="psi-pi:PK_DATAIN">
+			<xsd:selector xpath="./psi-pi:DataCollection/psi-pi:AnalysisData/psi-pi:SpectrumIdentificationList/psi-pi:SpectrumIdentificationResult/psi-pi:SpectrumElement"/>
+			<xsd:field xpath="@SpectraData_ref"/>
+		</xsd:keyref>
+		<xsd:keyref name="FK_DATAADSILSIRSII_PEP" refer="psi-pi:PK_SEQ">
+			<xsd:selector xpath="./psi-pi:DataCollection/psi-pi:AnalysisData/psi-pi:SpectrumIdentificationList/psi-pi:SpectrumIdentificationResult/psi-pi:SpectrumIdentificationItem"/>
+			<xsd:field xpath="@Peptide_ref"/>
+		</xsd:keyref>
+		<xsd:keyref name="FK_DATAADSILSIRSIIPEV_DBSEQ" refer="psi-pi:PK_SEQ">
+			<xsd:selector xpath="./psi-pi:DataCollection/psi-pi:AnalysisData/psi-pi:SpectrumIdentificationList/psi-pi:SpectrumIdentificationResult/psi-pi:SpectrumIdentificationItem/psi-pi:PeptideEvidence"/>
+			<xsd:field xpath="@DBSequence_ref"/>
+		</xsd:keyref>
+		<xsd:keyref name="FK_DATAADPDLPAGPDHPH_PEPEV" refer="psi-pi:PK_DATAADSILSIRPE">
+			<xsd:selector xpath="./psi-pi:DataCollection/psi-pi:AnalysisData/psi-pi:ProteinDetectionList/psi-pi:ProteinAmbiguityGroup/psi-pi:ProteinDetectionHypothesis/psi-pi:PeptideHypothesis"/>
+			<xsd:field xpath="@PeptideEvidence_Ref"/>
+		</xsd:keyref>
+		<xsd:keyref name="FK_SoftwareContact" refer="psi-pi:PK_AUDIT">
+			<xsd:selector xpath="./psi-pi:AnalysisSoftwareList/psi-pi:AnalysisSoftware/psi-pi:ContactRole"/>
+			<xsd:field xpath="@Contact_ref"/>
+		</xsd:keyref>
+		<xsd:keyref name="FK_ProviderContact" refer="psi-pi:PK_AUDIT">
+			<xsd:selector xpath="./psi-pi:Provider/psi-pi:ContactRole"/>
+			<xsd:field xpath="@Contact_ref"/>
+		</xsd:keyref>
+		<xsd:keyref name="FK_ProviderSoftware" refer="psi-pi:PK_ANASW">
+			<xsd:selector xpath="./psi-pi:Provider"/>
+			<xsd:field xpath="@Software_ref"/>
+		</xsd:keyref>
+		<xsd:keyref name="FK_SampleContact" refer="psi-pi:PK_AUDIT">
+			<xsd:selector xpath="./psi-pi:AnalysisSampleCollection/psi-pi:Sample/psi-pi:ContactRole"/>
+			<xsd:field xpath="@Contact_ref"/>
+		</xsd:keyref>
+		<xsd:keyref name="FK_subSamples" refer="psi-pi:PK_SAMPLE">
+			<xsd:selector xpath="./psi-pi:AnalysisSampleCollection/psi-pi:Sample/psi-pi:subSample"/>
+			<xsd:field xpath="@Sample_ref"/>
+		</xsd:keyref>
+		<xsd:keyref name="FK_PDH_DBSeq" refer="psi-pi:PK_SEQ">
+			<xsd:selector xpath="./psi-pi:DataCollection/psi-pi:AnalysisData/psi-pi:ProteinDetectionList/psi-pi:ProteinAmbiguityGroup/psi-pi:ProteinDetectionHypothesis"/>
+			<xsd:field xpath="@DBSequence_ref"/>
+		</xsd:keyref>
+		<xsd:keyref name="FK_FRAG_MEASURE" refer="psi-pi:PK_FragMeasure">
+			<xsd:selector xpath="./psi-pi:DataCollection/psi-pi:AnalysisData/psi-pi:SpectrumIdentificationList/psi-pi:SpectrumIdentificationResult/psi-pi:SpectrumIdentificationItem/psi-pi:Fragmentation/psi-pi:IonType/psi-pi:FragmentArray"/>
+			<xsd:field xpath="@Measure_ref"/>
+		</xsd:keyref>
+		<xsd:keyref name="FK_Sample" refer="psi-pi:PK_SAMPLE">
+			<xsd:selector xpath="./psi-pi:DataCollection/psi-pi:AnalysisData/psi-pi:SpectrumIdentificationList/psi-pi:SpectrumIdentificationResult/psi-pi:SpectrumIdentificationItem"/>
+			<xsd:field xpath="@Sample_ref"/>
+		</xsd:keyref>
+		<xsd:keyref name="FK_MassTable" refer="psi-pi:PK_MassTable">
+			<xsd:selector xpath="./psi-pi:DataCollection/psi-pi:AnalysisData/psi-pi:SpectrumIdentificationList/psi-pi:SpectrumIdentificationResult/SpectrumIdentificationItem"/>
+			<xsd:field xpath="@MassTable_ref"/>
+		</xsd:keyref>
+		<xsd:keyref name="FK_UnitCVlist1" refer="psi-pi:PK_CV">
+			<xsd:selector xpath="*"/>
+			<xsd:field xpath="@unitCvRef"/>
+		</xsd:keyref>
+		<xsd:keyref name="FK_UnitCVlist2" refer="psi-pi:PK_CV">
+			<xsd:selector xpath="*/*"/>
+			<xsd:field xpath="@unitCvRef"/>
+		</xsd:keyref>
+		<xsd:keyref name="FK_UnitCVlist3" refer="psi-pi:PK_CV">
+			<xsd:selector xpath="*/*/*"/>
+			<xsd:field xpath="@unitCvRef"/>
+		</xsd:keyref>
+		<xsd:keyref name="FK_UnitCVlist4" refer="psi-pi:PK_CV">
+			<xsd:selector xpath="*/*/*/*"/>
+			<xsd:field xpath="@unitCvRef"/>
+		</xsd:keyref>
+		<xsd:keyref name="FK_UnitCVlist5" refer="psi-pi:PK_CV">
+			<xsd:selector xpath="*/*/*/*/*"/>
+			<xsd:field xpath="@unitCvRef"/>
+		</xsd:keyref>
+		<xsd:keyref name="FK_UnitCVlist6" refer="psi-pi:PK_CV">
+			<xsd:selector xpath="*/*/*/*/*/*"/>
+			<xsd:field xpath="@unitCvRef"/>
+		</xsd:keyref>
+		<xsd:keyref name="FK_UnitCVlist7" refer="psi-pi:PK_CV">
+			<xsd:selector xpath="*/*/*/*/*/*/*"/>
+			<xsd:field xpath="@unitCvRef"/>
+		</xsd:keyref>
+		<xsd:keyref name="FK_UnitCVlist8" refer="psi-pi:PK_CV">
+			<xsd:selector xpath="*/*/*/*/*/*/*/*"/>
+			<xsd:field xpath="@unitCvRef"/>
+		</xsd:keyref>
+		<xsd:keyref name="FK_CVlist1" refer="psi-pi:PK_CV">
+			<xsd:selector xpath="*"/>
+			<xsd:field xpath="@cvRef"/>
+		</xsd:keyref>
+		<xsd:keyref name="FK_CVlist2" refer="psi-pi:PK_CV">
+			<xsd:selector xpath="*/*"/>
+			<xsd:field xpath="@cvRef"/>
+		</xsd:keyref>
+		<xsd:keyref name="FK_CVlist3" refer="psi-pi:PK_CV">
+			<xsd:selector xpath="*/*/*"/>
+			<xsd:field xpath="@cvRef"/>
+		</xsd:keyref>
+		<xsd:keyref name="FK_CVlist4" refer="psi-pi:PK_CV">
+			<xsd:selector xpath="*/*/*/*"/>
+			<xsd:field xpath="@cvRef"/>
+		</xsd:keyref>
+		<xsd:keyref name="FK_CVlist5" refer="psi-pi:PK_CV">
+			<xsd:selector xpath="*/*/*/*/*"/>
+			<xsd:field xpath="@cvRef"/>
+		</xsd:keyref>
+		<xsd:keyref name="FK_CVlist6" refer="psi-pi:PK_CV">
+			<xsd:selector xpath="*/*/*/*/*/*"/>
+			<xsd:field xpath="@cvRef"/>
+		</xsd:keyref>
+		<xsd:keyref name="FK_CVlist7" refer="psi-pi:PK_CV">
+			<xsd:selector xpath="*/*/*/*/*/*/*"/>
+			<xsd:field xpath="@cvRef"/>
+		</xsd:keyref>
+		<xsd:keyref name="FK_CVlist8" refer="psi-pi:PK_CV">
+			<xsd:selector xpath="*/*/*/*/*/*/*/*"/>
+			<xsd:field xpath="@cvRef"/>
+		</xsd:keyref>
+	</xsd:element>
+	<xsd:complexType name="cvListType">
+		<xsd:sequence>
+			<xsd:element ref="cv" maxOccurs="unbounded"/>
+		</xsd:sequence>
+	</xsd:complexType>
+	<xsd:complexType name="AnalysisSoftwareListType">
+		<xsd:sequence>
+			<xsd:element name="AnalysisSoftware" type="PSI-PI.analysis.search.AnalysisSoftwareType" maxOccurs="unbounded"/>
+		</xsd:sequence>
+	</xsd:complexType>
+	<xsd:complexType name="AnalysisSampleCollectionType">
+		<xsd:sequence>
+			<xsd:element name="Sample" type="SampleType" maxOccurs="unbounded"/>
+		</xsd:sequence>
+	</xsd:complexType>
+	<xsd:complexType name="SequenceCollectionType">
+		<xsd:sequence>
+			<xsd:element name="DBSequence" type="PSI-PI.analysis.search.DBSequenceType" minOccurs="0" maxOccurs="unbounded"/>
+			<xsd:element name="Peptide" type="PSI-PI.polypeptide.PeptideType" minOccurs="0" maxOccurs="unbounded"/>
+		</xsd:sequence>
+	</xsd:complexType>
+	<xsd:complexType name="AnalysisCollectionType">
+		<xsd:sequence>
+			<xsd:element name="SpectrumIdentification" type="PSI-PI.analysis.search.SpectrumIdentificationType" maxOccurs="unbounded"/>
+			<xsd:element name="ProteinDetection" type="PSI-PI.analysis.process.ProteinDetectionType" minOccurs="0"/>
+		</xsd:sequence>
+	</xsd:complexType>
+	<xsd:complexType name="AnalysisProtocolCollectionType">
+		<xsd:sequence>
+			<xsd:element name="SpectrumIdentificationProtocol" type="PSI-PI.analysis.search.SpectrumIdentificationProtocolType" maxOccurs="unbounded"/>
+			<xsd:element name="ProteinDetectionProtocol" type="PSI-PI.analysis.process.ProteinDetectionProtocolType" minOccurs="0"/>
+		</xsd:sequence>
+	</xsd:complexType>
+	<xsd:complexType name="InputsType">
+		<xsd:sequence>
+			<xsd:element name="SourceFile" type="PSI-PI.analysis.search.SourceFileType" minOccurs="0" maxOccurs="unbounded"/>
+			<xsd:element name="SearchDatabase" type="PSI-PI.analysis.search.SearchDatabaseType" minOccurs="0" maxOccurs="unbounded"/>
+			<xsd:element name="SpectraData" type="PSI-PI.spectra.SpectraDataType" minOccurs="0" maxOccurs="unbounded"/>
+		</xsd:sequence>
+	</xsd:complexType>
+	<xsd:complexType name="AnalysisDataType">
+		<xsd:sequence>
+			<xsd:element name="SpectrumIdentificationList" type="PSI-PI.analysis.search.SpectrumIdentificationListType" maxOccurs="unbounded"/>
+			<xsd:element name="ProteinDetectionList" type="PSI-PI.analysis.process.ProteinDetectionListType" minOccurs="0"/>
+		</xsd:sequence>
+	</xsd:complexType>
+	<xsd:complexType name="DataCollectionType">
+		<xsd:sequence>
+			<xsd:element name="Inputs" type="InputsType">
+				<xsd:annotation>
+					<xsd:documentation>The inputs to the analyses including the databases searched, the spectral data and the source file converted to mzIdentML.</xsd:documentation>
+				</xsd:annotation>
+			</xsd:element>
+			<xsd:element name="AnalysisData" type="AnalysisDataType">
+				<xsd:annotation>
+					<xsd:documentation>Data sets generated by the analyses, including peptide and protein lists.</xsd:documentation>
+				</xsd:annotation>
+			</xsd:element>
+		</xsd:sequence>
+	</xsd:complexType>
+	<xsd:complexType name="PSI-PI.Main.mzIdentMLType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> The upper-most hierarchy level of mzIdentML with sub-containers for example describing software, protocols and search results (spectrum identifications or protein detection results). </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.IdentifiableType">
+				<xsd:sequence>
+					<xsd:element name="cvList" type="cvListType">
+						<xsd:annotation>
+							<xsd:documentation> The list of CVs used within the file</xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+					<xsd:element name="AnalysisSoftwareList" type="AnalysisSoftwareListType" minOccurs="0">
+						<xsd:annotation>
+							<xsd:documentation> The software used to perform the analyses (specify at least name, manufacturer, version, URL). </xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+					<xsd:element ref="Provider" minOccurs="0">
+						<xsd:annotation>
+							<xsd:documentation>The Provider of the mzIdentML record in terms of the contact and software.</xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+					<xsd:element ref="AuditCollection" minOccurs="0">
+						<xsd:annotation>
+							<xsd:documentation>The complete set of Contacts (people and organisations) for this file.</xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+					<xsd:element name="AnalysisSampleCollection" type="AnalysisSampleCollectionType" minOccurs="0">
+						<xsd:annotation>
+							<xsd:documentation>The samples analysed can optionally be recorded using CV terms for descriptions. If a composite...</xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+					<xsd:element name="SequenceCollection" type="SequenceCollectionType" minOccurs="0">
+						<xsd:annotation>
+							<xsd:documentation>The collection of sequences (DBSequence or Peptide) identified to be referenced in the results.</xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+					<xsd:element name="AnalysisCollection" type="AnalysisCollectionType">
+						<xsd:annotation>
+							<xsd:documentation> The analyses performed to get the results, which map the input and output data sets. Analyses are for example: SpectrumIdentification (resulting in peptides) or ProteinDetection (assemble proteins from peptides).</xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+					<xsd:element name="AnalysisProtocolCollection" type="AnalysisProtocolCollectionType">
+						<xsd:annotation>
+							<xsd:documentation> The collection of protocols which include the parameters and settings of the performed analyses. </xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+					<xsd:element name="DataCollection" type="DataCollectionType">
+						<xsd:annotation>
+							<xsd:documentation>The collection of input and output data sets of the analyses.</xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+					<xsd:element ref="BibliographicReference" minOccurs="0" maxOccurs="unbounded">
+						<xsd:annotation>
+							<xsd:documentation>Any bibliographic references associated with the file</xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+				</xsd:sequence>
+				<xsd:attribute name="creationDate" type="xsd:dateTime">
+					<xsd:annotation>
+						<xsd:documentation>The date on which the file was produced.</xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="version" type="psi-pi:versionRegex" use="required">
+					<xsd:annotation>
+						<xsd:documentation>The version of the schema this instance document refers to, in the format x.y.z. Changes to z should not affect prevent instance documents from validating. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="PSI-PI.analysis.search.SearchDatabaseType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> A database for searching mass spectra. Examples include a set of amino acid sequence entries, or annotated spectra libraries. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Bio.Data.ExternalDataType">
+				<xsd:sequence>
+					<xsd:element name="DatabaseName" type="ParamType">
+						<xsd:annotation>
+							<xsd:documentation>The database name may be given as a cvParam if it maps exactly to one of the release databases listed in the CV, otherwise a userParam should be used.</xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+					<xsd:element ref="cvParam" minOccurs="0" maxOccurs="unbounded"/>
+				</xsd:sequence>
+				<xsd:attribute name="version" type="xsd:string">
+					<xsd:annotation>
+						<xsd:documentation>The version of the database.</xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="releaseDate" type="xsd:string">
+					<xsd:annotation>
+						<xsd:documentation>The release date of the database.</xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="numDatabaseSequences" type="xsd:long">
+					<xsd:annotation>
+						<xsd:documentation>The total number of sequences in the database.</xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="numResidues" type="xsd:long">
+					<xsd:annotation>
+						<xsd:documentation>The number of residues in the database.</xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="PSI-PI.analysis.search.SourceFileType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> A file from which this mzIdentML instance was created. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Bio.Data.ExternalDataType">
+				<xsd:sequence>
+					<xsd:group ref="ParamGroup" minOccurs="0" maxOccurs="unbounded"/>
+				</xsd:sequence>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="ModificationParamsType">
+		<xsd:sequence>
+			<xsd:element name="SearchModification" type="PSI-PI.analysis.search.SearchModificationType" maxOccurs="unbounded"/>
+		</xsd:sequence>
+	</xsd:complexType>
+	<xsd:complexType name="FilterType">
+		<xsd:sequence>
+			<xsd:element name="FilterType" type="ParamType">
+				<xsd:annotation>
+					<xsd:documentation>The type of filter e.g. database taxonomy filter, pi filter, mw filter </xsd:documentation>
+				</xsd:annotation>
+			</xsd:element>
+			<xsd:element name="Include" type="ParamListType" minOccurs="0">
+				<xsd:annotation>
+					<xsd:documentation>All sequences fulfilling the specifed criteria are included.</xsd:documentation>
+				</xsd:annotation>
+			</xsd:element>
+			<xsd:element name="Exclude" type="ParamListType" minOccurs="0">
+				<xsd:annotation>
+					<xsd:documentation>All sequences fulfilling the specifed criteria are excluded.</xsd:documentation>
+				</xsd:annotation>
+			</xsd:element>
+		</xsd:sequence>
+	</xsd:complexType>
+	<xsd:complexType name="DatabaseFiltersType">
+		<xsd:sequence>
+			<xsd:element name="Filter" type="FilterType" maxOccurs="unbounded">
+				<xsd:annotation>
+					<xsd:documentation>The filter must include at least one of Include and Exclude. If both are used, it is assumed that inclusion is performed first.</xsd:documentation>
+				</xsd:annotation>
+			</xsd:element>
+		</xsd:sequence>
+	</xsd:complexType>
+	<xsd:complexType name="TranslationTableType">
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.IdentifiableType">
+				<xsd:sequence>
+					<xsd:element ref="cvParam" minOccurs="0" maxOccurs="unbounded"/>
+				</xsd:sequence>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="DatabaseTranslationType">
+		<xsd:sequence>
+			<xsd:element name="TranslationTable" type="TranslationTableType" maxOccurs="unbounded">
+				<xsd:annotation>
+					<xsd:documentation>The table used to translate codons into nucleic acids e.g. by reference to the NCBI translation table.</xsd:documentation>
+				</xsd:annotation>
+			</xsd:element>
+		</xsd:sequence>
+		<xsd:attribute name="frames" type="listOfAllowedFrames">
+			<xsd:annotation>
+				<xsd:documentation>The frames in which the nucleic acid sequence has been translated as a space separated list</xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+	</xsd:complexType>
+	<xsd:complexType name="PSI-PI.analysis.search.SpectrumIdentificationProtocolType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> The parameters and settings of a SpectrumIdentification analysis. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.Protocol.ProtocolType">
+				<xsd:sequence>
+					<xsd:element name="SearchType" type="ParamType">
+						<xsd:annotation>
+							<xsd:documentation>The type of search performed e.g. PMF, Tag searches, MS-MS</xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+					<xsd:element name="AdditionalSearchParams" type="ParamListType" minOccurs="0">
+						<xsd:annotation>
+							<xsd:documentation>The search parameters other than the modifications searched.</xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+					<xsd:element name="ModificationParams" type="ModificationParamsType" minOccurs="0">
+						<xsd:annotation>
+							<xsd:documentation> The specification of static/variable modifications (e.g. Oxidation of Methionine) that are to be considered in the spectra search. </xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+					<xsd:element name="Enzymes" type="PSI-PI.analysis.search.EnzymesType" minOccurs="0"/>
+					<xsd:element name="MassTable" type="PSI-PI.analysis.search.MassTableType" minOccurs="0" maxOccurs="unbounded"/>
+					<xsd:element name="FragmentTolerance" type="ToleranceType" minOccurs="0"/>
+					<xsd:element name="ParentTolerance" type="ToleranceType" minOccurs="0"/>
+					<xsd:element name="Threshold" type="ParamListType">
+						<xsd:annotation>
+							<xsd:documentation>The threshold(s) applied to determine that a result is significant. If multiple terms are used it is assumed that all conditions are satisfied by the passing results.</xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+					<xsd:element name="DatabaseFilters" type="DatabaseFiltersType" minOccurs="0">
+						<xsd:annotation>
+							<xsd:documentation>The specification of filters applied to the database searched.</xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+					<xsd:element name="DatabaseTranslation" type="DatabaseTranslationType" minOccurs="0">
+						<xsd:annotation>
+							<xsd:documentation>A specification of how a nucleic acid sequence database was translated for searching.</xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+				</xsd:sequence>
+				<xsd:attribute name="AnalysisSoftware_ref" type="xsd:string" use="required">
+					<xsd:annotation>
+						<xsd:documentation> The search algorithm used, given as a reference to the SoftwareCollection section. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="InputSpectraType">
+		<xsd:annotation>
+			<xsd:documentation> The attribute referencing an identifier within the SpectraData section. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:attribute name="SpectraData_ref" type="xsd:string">
+			<xsd:annotation>
+				<xsd:documentation>A reference to the SpectraData element which locates the input spectra to an external file.</xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+	</xsd:complexType>
+	<xsd:complexType name="SearchDatabaseType">
+		<xsd:attribute name="SearchDatabase_ref" type="xsd:string">
+			<xsd:annotation>
+				<xsd:documentation>A reference to the database searched.</xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+	</xsd:complexType>
+	<xsd:complexType name="PSI-PI.analysis.search.SpectrumIdentificationType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> An Analysis which tries to identify peptides in input spectra, referencing the database searched, the input spectra, the output results and the protocol that is run. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.Protocol.ProtocolApplicationType">
+				<xsd:sequence>
+					<xsd:element name="InputSpectra" type="InputSpectraType" maxOccurs="unbounded">
+						<xsd:annotation>
+							<xsd:documentation> One of the spectra data sets used (can be several).</xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+					<xsd:element name="SearchDatabase" type="SearchDatabaseType" maxOccurs="unbounded">
+						<xsd:annotation>
+							<xsd:documentation> One of the search databases used (can be several).</xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+				</xsd:sequence>
+				<xsd:attribute name="SpectrumIdentificationProtocol_ref" type="xsd:string" use="required">
+					<xsd:annotation>
+						<xsd:documentation> A reference to the search protocol used for this SpectrumIdentification. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="SpectrumIdentificationList_ref" type="xsd:string" use="required">
+					<xsd:annotation>
+						<xsd:documentation> A reference to the SpectrumIdentificationList produced by this analysis in the DataCollection section. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="MeasureType">
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.IdentifiableType">
+				<xsd:sequence>
+					<xsd:element ref="cvParam" maxOccurs="unbounded"/>
+				</xsd:sequence>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="FragmentationTableType">
+		<xsd:sequence>
+			<xsd:element name="Measure" type="MeasureType" maxOccurs="unbounded">
+				<xsd:annotation>
+					<xsd:documentation>References to CV terms defining the measures about product ions to be reported in SpectrumIdentificationItem</xsd:documentation>
+				</xsd:annotation>
+			</xsd:element>
+		</xsd:sequence>
+	</xsd:complexType>
+	<xsd:complexType name="PSI-PI.analysis.search.SpectrumIdentificationListType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> Represents the set of all search results from SpectrumIdentification. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Bio.Data.InternalDataType">
+				<xsd:sequence>
+					<xsd:group ref="ParamGroup" minOccurs="0" maxOccurs="unbounded">
+						<xsd:annotation>
+							<xsd:documentation>Scores or output parameters associated with the SpectrumIdentificationList</xsd:documentation>
+						</xsd:annotation>
+					</xsd:group>
+					<xsd:element name="FragmentationTable" type="FragmentationTableType" minOccurs="0">
+						<xsd:annotation>
+							<xsd:documentation>Contains the types of measures that will be reported in generic arrays for each SpectrumIdentificationItem e.g. product ion m/z, product ion intensity, product ion m/z error</xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+					<xsd:element name="SpectrumIdentificationResult" type="PSI-PI.analysis.search.SpectrumIdentificationResultType" maxOccurs="unbounded"/>
+				</xsd:sequence>
+				<xsd:attribute name="numSequencesSearched" type="xsd:long">
+					<xsd:annotation>
+						<xsd:documentation>This value should be provided unless a de novo search has been performed.</xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="SpecificityRulesType">
+		<xsd:sequence>
+			<xsd:element ref="cvParam" maxOccurs="unbounded"/>
+		</xsd:sequence>
+	</xsd:complexType>
+	<xsd:complexType name="PSI-PI.analysis.search.SearchModificationType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> Specification of a search modification as parameter for a spectra search. Contains the name of the modification, the mass, the specificity and whether it is a static modification. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:sequence>
+			<xsd:element name="ModParam" type="PSI-PI.polypeptide.ModParamType"/>
+			<xsd:element name="SpecificityRules" type="SpecificityRulesType" minOccurs="0">
+				<xsd:annotation>
+					<xsd:documentation>The specificity rules of the searched modification including for example the probability of a modification's presence or peptide or protein termini. Standard fixed or variable status should be provided by the attribute fixedMod.</xsd:documentation>
+				</xsd:annotation>
+			</xsd:element>
+		</xsd:sequence>
+		<xsd:attribute name="fixedMod" type="xsd:boolean" use="required">
+			<xsd:annotation>
+				<xsd:documentation> True, if the modification is static (i.e. occurs always). </xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+	</xsd:complexType>
+	<xsd:complexType name="FragmentArrayType">
+		<xsd:attribute name="values" type="listOfFloats" use="required">
+			<xsd:annotation>
+				<xsd:documentation>The values of this particular measure, corresponding to the index defined in ion type</xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+		<xsd:attribute name="Measure_ref" type="xsd:string" use="required">
+			<xsd:annotation>
+				<xsd:documentation>A reference to the Measure defined in the FragmentationTable</xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+	</xsd:complexType>
+	<xsd:complexType name="IonTypeType">
+		<xsd:sequence>
+			<xsd:element ref="cvParam">
+				<xsd:annotation>
+					<xsd:documentation>The type of ion identified.</xsd:documentation>
+				</xsd:annotation>
+			</xsd:element>
+			<xsd:element name="FragmentArray" type="FragmentArrayType" minOccurs="0" maxOccurs="unbounded">
+				<xsd:annotation>
+					<xsd:documentation>An array of values for a given type of measure and for a particular ion type, in parallel to the index of ions identified.</xsd:documentation>
+				</xsd:annotation>
+			</xsd:element>
+		</xsd:sequence>
+		<xsd:attribute name="index" type="listOfIntegers">
+			<xsd:annotation>
+				<xsd:documentation>The index of ions identified as integers, following standard notation for a-c, x-z e.g. if b3 b5 and b6 have been identified, the index would store "3 5 6". For internal ions, the index contains pairs defining the start and end point - see specification document for examples. For immonium ions, the index is the position of the identified ion within the peptide sequence - if the peptide contains the same amino acid in multiple positions that cannot be distinguished, [...]
+			</xsd:annotation>
+		</xsd:attribute>
+		<xsd:attribute name="charge" type="xsd:int" use="required">
+			<xsd:annotation>
+				<xsd:documentation>The charge of the identified fragmentation ions.</xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+	</xsd:complexType>
+	<xsd:complexType name="FragmentationType">
+		<xsd:sequence>
+			<xsd:element name="IonType" type="IonTypeType" maxOccurs="unbounded">
+				<xsd:annotation>
+					<xsd:documentation>IonType defines the index of fragmentation ions being reported, importing a CV term for the type of ion e.g. b ion. Example: if b3 b7 b8 and b10 have been identified, the index attribute will contain 3 7 8 10, and the corresponding values will be reported in parallel arrays below</xsd:documentation>
+				</xsd:annotation>
+			</xsd:element>
+		</xsd:sequence>
+	</xsd:complexType>
+	<xsd:complexType name="PSI-PI.analysis.search.SpectrumIdentificationItemType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> An identification of a single (poly)peptide, resulting from querying an input spectra, along with the set of confidence values for that identification. PeptideEvidence elements should be given for all mappings of the corresponding Peptide sequence within protein sequences.</xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.IdentifiableType">
+				<xsd:sequence>
+					<xsd:element name="PeptideEvidence" type="PSI-PI.analysis.process.PeptideEvidenceType" minOccurs="0" maxOccurs="unbounded"/>
+					<xsd:group ref="ParamGroup" minOccurs="0" maxOccurs="unbounded"/>
+					<xsd:element name="Fragmentation" type="FragmentationType" minOccurs="0">
+						<xsd:annotation>
+							<xsd:documentation>The product ions identified in this result.</xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+				</xsd:sequence>
+				<xsd:attribute name="chargeState" type="xsd:int" use="required">
+					<xsd:annotation>
+						<xsd:documentation> The charge state of the identified peptide. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="experimentalMassToCharge" type="xsd:double" use="required">
+					<xsd:annotation>
+						<xsd:documentation> The mass-to-charge value measured in the experiment in Daltons / charge. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="calculatedMassToCharge" type="xsd:double">
+					<xsd:annotation>
+						<xsd:documentation> The theoretical mass-to-charge value calculated for the peptide in Daltons / charge. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="calculatedPI" type="xsd:float">
+					<xsd:annotation>
+						<xsd:documentation> The calculated isoelectric point of the (poly)peptide, with relevant modifications included. Do not supply this value if the PI cannot be calcuated properly. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="Peptide_ref" type="xsd:string">
+					<xsd:annotation>
+						<xsd:documentation> A reference to the identified (poly)peptide sequence in the Peptide element. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="rank" type="xsd:int" use="required">
+					<xsd:annotation>
+						<xsd:documentation>For an MS/MS result set, this is the rank of the identification quality as scored by the search engine. 1 is the top rank. If multiple identifications have the same top score, they should all be assigned rank =1. For PMF data, the rank attribute may be meaningless and values of rank = 0 should be given. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="passThreshold" type="xsd:boolean" use="required">
+					<xsd:annotation>
+						<xsd:documentation>Set to true if the producers of the file has deemed that the identification has passed a given threshold or been validated as correct. If no such threshold has been set, value of true should be given for all results. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="MassTable_ref" type="xsd:string">
+					<xsd:annotation>
+						<xsd:documentation>A reference should be given to the MassTable used to calculate the sequenceMass only if more than one MassTable has been given</xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="Sample_ref" type="xsd:string">
+					<xsd:annotation>
+						<xsd:documentation>A reference should be provided to link the SpectrumIdentificationItem to a Sample if more than one sample has been described in the AnalysisSampleCollection. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="PSI-PI.analysis.search.SpectrumIdentificationResultType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> All identifications made from searching one spectrum. For PMF data, all peptide identifications will be listed underneath as SpectrumIdentificationItems. For MS/MS data, there will be ranked SpectrumIdentificationItems corresponding to possible different peptide IDs.</xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.IdentifiableType">
+				<xsd:sequence>
+					<xsd:element name="SpectrumIdentificationItem" type="PSI-PI.analysis.search.SpectrumIdentificationItemType" maxOccurs="unbounded"/>
+					<xsd:group ref="ParamGroup" minOccurs="0" maxOccurs="unbounded"/>
+				</xsd:sequence>
+				<xsd:attribute name="spectrumID" type="xsd:string" use="required">
+					<xsd:annotation>
+						<xsd:documentation> The locally unique id for the spectrum in the spectra data set specified by SpectraData_ref. External guidelines are provided on the use of consistent identifiers for spectra in different external formats.</xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="SpectraData_ref" type="xsd:string" use="required">
+					<xsd:annotation>
+						<xsd:documentation> A reference to a spectra data set (e.g. a spectra file). </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="InputSpectrumIdentificationsType">
+		<xsd:attribute name="SpectrumIdentificationList_ref" type="xsd:string" use="required">
+			<xsd:annotation>
+				<xsd:documentation>A reference to the list of spectrum identifications that were input to the process.</xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+	</xsd:complexType>
+	<xsd:complexType name="PSI-PI.analysis.process.ProteinDetectionType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> An Analysis which assembles a set of peptides (e.g. from a spectra search analysis) to proteins. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.Protocol.ProtocolApplicationType">
+				<xsd:sequence>
+					<xsd:element name="InputSpectrumIdentifications" type="InputSpectrumIdentificationsType" maxOccurs="unbounded">
+						<xsd:annotation>
+							<xsd:documentation>The lists of spectrum identifications that are input to the protein detection process.</xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+				</xsd:sequence>
+				<xsd:attribute name="ProteinDetectionList_ref" type="xsd:string" use="required">
+					<xsd:annotation>
+						<xsd:documentation> A reference to the ProteinDetectionList in the DataCollection section. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="ProteinDetectionProtocol_ref" type="xsd:string" use="required">
+					<xsd:annotation>
+						<xsd:documentation> A reference to the detection protocol used for this ProteinDetection. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="PSI-PI.analysis.process.ProteinDetectionProtocolType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> The parameters and settings of a ProteinDetection process. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.Protocol.ProtocolType">
+				<xsd:sequence>
+					<xsd:element name="AnalysisParams" type="ParamListType" minOccurs="0">
+						<xsd:annotation>
+							<xsd:documentation>The parameters and settings for the protein detection given as CV terms. </xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+					<xsd:element name="Threshold" type="ParamListType">
+						<xsd:annotation>
+							<xsd:documentation>The threshold(s) applied to determine that a result is significant. If multiple terms are used it is assumed that all conditions are satisfied by the passing results.</xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+				</xsd:sequence>
+				<xsd:attribute name="AnalysisSoftware_ref" type="xsd:string" use="required">
+					<xsd:annotation>
+						<xsd:documentation> The protein detection software used, given as a reference to the SoftwareCollection section. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="PSI-PI.analysis.process.ProteinDetectionListType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> The protein list resulting from a protein detection process. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Bio.Data.InternalDataType">
+				<xsd:sequence>
+					<xsd:group ref="ParamGroup" minOccurs="0" maxOccurs="unbounded">
+						<xsd:annotation>
+							<xsd:documentation>Scores or output parameters associated with the ProteinDetectionList</xsd:documentation>
+						</xsd:annotation>
+					</xsd:group>
+					<xsd:element name="ProteinAmbiguityGroup" type="PSI-PI.analysis.process.ProteinAmbiguityGroupType" minOccurs="0" maxOccurs="unbounded"/>
+				</xsd:sequence>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="PeptideHypothesisType">
+		<xsd:attribute name="PeptideEvidence_Ref" type="xsd:string" use="required">
+			<xsd:annotation>
+				<xsd:documentation>A reference to the PeptideEvidence element on which this hypothesis is based.</xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+	</xsd:complexType>
+	<xsd:complexType name="PSI-PI.analysis.process.ProteinDetectionHypothesisType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> A single result of the ProteinDetection analysis (i.e. a protein).</xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.IdentifiableType">
+				<xsd:sequence>
+					<xsd:element name="PeptideHypothesis" type="PeptideHypothesisType" maxOccurs="unbounded">
+						<xsd:annotation>
+							<xsd:documentation>Peptide evidence on which this ProteinHypothesis is based by reference to a PeptideEvidence element in a SpectrumIdentificationItem. </xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+					<xsd:group ref="ParamGroup" minOccurs="0" maxOccurs="unbounded"/>
+				</xsd:sequence>
+				<xsd:attribute name="DBSequence_ref" type="xsd:string">
+					<xsd:annotation>
+						<xsd:documentation>A reference to the corresponding DBSequence entry. This is optional and redundant, because the PeptideEvidence elements referenced from here also map to the DBSequence.</xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="passThreshold" type="xsd:boolean" use="required">
+					<xsd:annotation>
+						<xsd:documentation>Set to true if the producers of the file has deemed that the ProteinDetectionHypothesis has passed a given threshold or been validated as correct. If no such threshold has been set, value of true should be given for all results. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="PSI-PI.analysis.process.ProteinAmbiguityGroupType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation>A set of logically related results from a protein detection, for example to represent conflicting assignments of peptides to proteins.</xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.IdentifiableType">
+				<xsd:sequence>
+					<xsd:element name="ProteinDetectionHypothesis" type="PSI-PI.analysis.process.ProteinDetectionHypothesisType" maxOccurs="unbounded"/>
+					<xsd:group ref="ParamGroup" minOccurs="0" maxOccurs="unbounded"/>
+				</xsd:sequence>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="PSI-PI.polypeptide.ModificationType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation>A molecule modification specification. If n modifications have been found on a peptide, there should be n instances of Modification. If multiple modifications are provided as cvParams, it is assumed that the modification is ambiguous i.e. one modification or another. If no CVParams are provided it is assumed that the delta has not been matched to a known modification. A neutral loss should be defined as an additional CVParam within Modification. If more complex info [...]
+		</xsd:annotation>
+		<xsd:sequence>
+			<xsd:group ref="ParamGroup" minOccurs="0" maxOccurs="unbounded"/>
+		</xsd:sequence>
+		<xsd:attribute name="location" type="xsd:int">
+			<xsd:annotation>
+				<xsd:documentation>Location of the modification within the peptide - position in peptide sequence, counted from the N-terminus residue, starting at position 1. Specific modifications to the N-terminus should be given the location 0. Modification to the C-terminus should be given as peptide length + 1.</xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+		<xsd:attribute name="residues" type="listOfChars">
+			<xsd:annotation>
+				<xsd:documentation>Specification of the residue (amino acid) on which the modification occurs. If multiple values are given, it is assumed that the exact residue modified is unknown i.e. the modification is to ONE of the residues listed. Multiple residues would usually only be specified for PMF data.</xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+		<xsd:attribute name="avgMassDelta" type="xsd:double">
+			<xsd:annotation>
+				<xsd:documentation> Atomic mass delta considering the natural distribution of isotopes in Daltons. </xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+		<xsd:attribute name="monoisotopicMassDelta" type="xsd:double">
+			<xsd:annotation>
+				<xsd:documentation> Atomic mass delta when assuming only the most common isotope of elements in Daltons. </xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+	</xsd:complexType>
+	<xsd:complexType name="PSI-PI.polypeptide.ModParamType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation>The modification searched for, sourced from e.g. UniMod and the mass delta</xsd:documentation>
+		</xsd:annotation>
+		<xsd:sequence>
+			<xsd:element ref="cvParam">
+				<xsd:annotation>
+					<xsd:documentation>The name of the modification imported from a relevant CV</xsd:documentation>
+				</xsd:annotation>
+			</xsd:element>
+		</xsd:sequence>
+		<xsd:attribute name="massDelta" type="xsd:float" use="required">
+			<xsd:annotation>
+				<xsd:documentation>The mass delta of the searched modification in Daltons</xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+		<xsd:attribute name="residues" type="listOfChars" use="required">
+			<xsd:annotation>
+				<xsd:documentation>The residue(s) searched with the specified modification</xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+	</xsd:complexType>
+	<xsd:complexType name="PSI-PI.polypeptide.PeptideType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> One (poly)peptide (a sequence with modifications).</xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Bio.ConceptualMolecule.ConceptualMoleculeType">
+				<xsd:sequence>
+					<xsd:element name="peptideSequence" type="sequence">
+						<xsd:annotation>
+							<xsd:documentation> The amino acid sequence of the (poly)peptide. If a substitution modification has been found, the original sequence should be reported. </xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+					<xsd:element name="Modification" type="PSI-PI.polypeptide.ModificationType" minOccurs="0" maxOccurs="unbounded"/>
+					<xsd:element name="SubstitutionModification" type="PSI-PI.polypeptide.SubstitutionModificationType" minOccurs="0" maxOccurs="unbounded"/>
+					<xsd:group ref="ParamGroup" minOccurs="0" maxOccurs="unbounded"/>
+				</xsd:sequence>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="PSI-PI.polypeptide.SubstitutionModificationType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> A modification where one residue is substituted by another (amino acid change). </xsd:documentation>
+		</xsd:annotation>
+		<xsd:attribute name="originalResidue" use="required">
+			<xsd:annotation>
+				<xsd:documentation> The original residue before replacement. </xsd:documentation>
+			</xsd:annotation>
+			<xsd:simpleType>
+				<xsd:restriction base="xsd:string">
+					<xsd:pattern value="[ABCDEFGHIJKLMNOPQRSTUVWXYZ?\-]{1}"/>
+				</xsd:restriction>
+			</xsd:simpleType>
+		</xsd:attribute>
+		<xsd:attribute name="replacementResidue" use="required">
+			<xsd:annotation>
+				<xsd:documentation> The residue that replaced the originalResidue. </xsd:documentation>
+			</xsd:annotation>
+			<xsd:simpleType>
+				<xsd:restriction base="xsd:string">
+					<xsd:pattern value="[ABCDEFGHIJKLMNOPQRSTUVWXYZ?\-]{1}"/>
+				</xsd:restriction>
+			</xsd:simpleType>
+		</xsd:attribute>
+		<xsd:attribute name="location" type="xsd:int">
+			<xsd:annotation>
+				<xsd:documentation>Location of the modification within the peptide - position in peptide sequence, counted from the N-terminus residue, starting at position 1. Specific modifications to the N-terminus should be given the location 0. Modification to the C-terminus should be given as peptide length + 1.</xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+		<xsd:attribute name="avgMassDelta" type="xsd:double">
+			<xsd:annotation>
+				<xsd:documentation> Atomic mass delta considering the natural distribution of isotopes in Daltons. This should only be reported if the original amino acid is known i.e. it is not "X" </xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+		<xsd:attribute name="monoisotopicMassDelta" type="xsd:double">
+			<xsd:annotation>
+				<xsd:documentation> Atomic mass delta when assuming only the most common isotope of elements in Daltons. This should only be reported if the original amino acid is known i.e. it is not "X" </xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+	</xsd:complexType>
+	<xsd:complexType name="PSI-PI.spectra.SpectraDataType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> A data set containing spectra data (consisting of one or more spectra). </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Bio.Data.ExternalDataType">
+				<xsd:sequence>
+					<xsd:element name="spectrumIDFormat" type="spectrumIDFormatType"/>					
+				</xsd:sequence>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="PSI-PI.analysis.search.AnalysisSoftwareType" abstract="false">
+		<xsd:annotation>
+			<xsd:documentation> The software used for performing the analyses. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.Protocol.SoftwareType">
+				<xsd:sequence>
+					<xsd:element name="SoftwareName" type="ParamType">
+						<xsd:annotation>
+							<xsd:documentation>The name of the analysis software package, sourced from a CV if available.</xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+					<xsd:element name="Customizations" type="xsd:string" minOccurs="0">
+						<xsd:annotation>
+							<xsd:documentation>Any customizations to the software, such as alternative scoring mechanisms implemented, should be documented here as free text.</xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+				</xsd:sequence>
+				<xsd:attribute name="URI" type="xsd:anyURI">
+					<xsd:annotation>
+						<xsd:documentation>URI of the analysis software e.g. manufacturer's website</xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="PSI-PI.analysis.search.EnzymeType">
+		<xsd:annotation>
+			<xsd:documentation>The details of an individual cleavage enzyme should be provided by giving a regular expression or a CV term if a "standard" enzyme cleavage has been performed.</xsd:documentation>
+		</xsd:annotation>
+		<xsd:sequence>
+			<xsd:element name="SiteRegexp" type="PSI-PI.analysis.search.SiteRegexpType" minOccurs="0"/>
+			<xsd:element name="EnzymeName" type="ParamListType" minOccurs="0">
+				<xsd:annotation>
+					<xsd:documentation>The name of the enzyme from a CV.</xsd:documentation>
+				</xsd:annotation>
+			</xsd:element>
+		</xsd:sequence>
+		<xsd:attribute name="id" type="xsd:string" use="required"/>
+		<xsd:attribute name="NTermGain" use="optional">
+			<xsd:annotation>
+				<xsd:documentation>Element formula gained at NTerm.</xsd:documentation>
+			</xsd:annotation>
+			<xsd:simpleType>
+				<xsd:restriction base="xsd:string">
+					<xsd:pattern value="[A-Za-z0-9 ]+"/>
+				</xsd:restriction>
+			</xsd:simpleType>
+		</xsd:attribute>
+		<xsd:attribute name="CTermGain" use="optional">
+			<xsd:annotation>
+				<xsd:documentation>Element formula gained at CTerm.</xsd:documentation>
+			</xsd:annotation>
+			<xsd:simpleType>
+				<xsd:restriction base="xsd:string">
+					<xsd:pattern value="[A-Za-z0-9 ]+"/>
+				</xsd:restriction>
+			</xsd:simpleType>
+		</xsd:attribute>
+		<xsd:attribute name="semiSpecific" type="xsd:boolean" use="optional">
+			<xsd:annotation>
+				<xsd:documentation>Set to true if the enzyme cleaves semi-specifically (i.e. one terminus must cleave according to the rules, the other can cleave at any residue), false if the enzyme cleavage is assumed to be specific to both termini (accepting for any missed cleavages).</xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+		<xsd:attribute name="missedCleavages" type="xsd:int" use="optional">
+			<xsd:annotation>
+				<xsd:documentation>The number of missed cleavage sites allowed by the search. The attribute must be provided if an enzyme has been used.</xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+		<xsd:attribute name="minDistance" use="optional">
+			<xsd:annotation>
+				<xsd:documentation>Minimal distance for another cleavage (minimum: 1).</xsd:documentation>
+			</xsd:annotation>
+			<xsd:simpleType>
+				<xsd:restriction base="xsd:int">
+					<xsd:minInclusive value="1"/>
+				</xsd:restriction>
+			</xsd:simpleType>
+		</xsd:attribute>
+	</xsd:complexType>
+	<xsd:complexType name="PSI-PI.analysis.search.EnzymesType">
+		<xsd:annotation>
+			<xsd:documentation>The list of enzymes used in experiment</xsd:documentation>
+		</xsd:annotation>
+		<xsd:sequence>
+			<xsd:element name="Enzyme" type="PSI-PI.analysis.search.EnzymeType" maxOccurs="unbounded"/>
+		</xsd:sequence>
+		<xsd:attribute name="independent" type="xsd:boolean">
+			<xsd:annotation>
+				<xsd:documentation>If there are multiple enzymes specified, this attribute is set to true if cleavage with different enzymes is performed independently</xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+	</xsd:complexType>
+	<xsd:complexType name="ResidueType">
+		<xsd:attribute name="Code" type="chars" use="required">
+			<xsd:annotation>
+				<xsd:documentation>The single letter code for the residue.</xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+		<xsd:attribute name="Mass" type="xsd:float" use="required">
+			<xsd:annotation>
+				<xsd:documentation>The residue mass in Daltons (not including any fixed modifications). </xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+	</xsd:complexType>
+	<xsd:complexType name="AmbiguousResidueType">
+		<xsd:sequence>
+			<xsd:group ref="ParamGroup" maxOccurs="unbounded"/>
+		</xsd:sequence>
+		<xsd:attribute name="Code" type="chars" use="required">
+			<xsd:annotation>
+				<xsd:documentation>The single letter code of the ambiguous residue e.g. X.</xsd:documentation>
+			</xsd:annotation>
+		</xsd:attribute>
+	</xsd:complexType>
+	<xsd:complexType name="PSI-PI.analysis.search.MassTableType">
+		<xsd:annotation>
+			<xsd:documentation>The masses of residues used in the search.</xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.IdentifiableType">
+				<xsd:sequence>
+					<xsd:group ref="ParamGroup" minOccurs="0" maxOccurs="unbounded"/>
+					<xsd:element name="Residue" type="ResidueType" minOccurs="0" maxOccurs="unbounded">
+						<xsd:annotation>
+							<xsd:documentation>The specification of a single residue within the mass table.</xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+					<xsd:element name="AmbiguousResidue" type="AmbiguousResidueType" minOccurs="0" maxOccurs="unbounded">
+						<xsd:annotation>
+							<xsd:documentation>Ambiguous residues e.g. X can be specified by the Code attribute and a set of parameters for example giving the different masses that will be used in the search.</xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+				</xsd:sequence>
+				<xsd:attribute name="msLevel" type="listOfIntegers" use="required">
+					<xsd:annotation>
+						<xsd:documentation>The MS spectrum that the MassTable refers to e.g. "1" for MS1 "2" for MS2 or "1 2" for MS1 or MS2</xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="PSI-PI.analysis.process.PeptideEvidenceType">
+		<xsd:annotation>
+			<xsd:documentation>PeptideEvidence maps a spectrum identification to DBSequence in which such a peptide is located. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Common.IdentifiableType">
+				<xsd:sequence>
+					<xsd:group ref="ParamGroup" minOccurs="0" maxOccurs="unbounded"/>
+				</xsd:sequence>
+				<xsd:attribute name="DBSequence_Ref" type="xsd:string" use="required">
+					<xsd:annotation>
+						<xsd:documentation>A reference to the sequence from which this identification has been made.</xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="start" type="xsd:int">
+					<xsd:annotation>
+						<xsd:documentation> Start position of the peptide inside the protein sequence, where the first amino acid of the protein sequence is position 1.  </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="end" type="xsd:int">
+					<xsd:annotation>
+						<xsd:documentation> The index position of the last amino acid of the peptide inside the protein sequence, where the first amino acid of the protein sequence is position 1. </xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="pre">
+					<xsd:annotation>
+						<xsd:documentation> Previous flanking residue. If the peptide is N-terminal, pre="-" and not pre="". If for any reason it is unknown (e.g. denovo), pre="?" should be used. </xsd:documentation>
+					</xsd:annotation>
+					<xsd:simpleType>
+						<xsd:restriction base="xsd:string">
+							<xsd:pattern value="[ABCDEFGHIJKLMNOPQRSTUVWXYZ?\-]{1}"/>
+						</xsd:restriction>
+					</xsd:simpleType>
+				</xsd:attribute>
+				<xsd:attribute name="post">
+					<xsd:annotation>
+						<xsd:documentation> Post flanking residue.  If the peptide is C-terminal, post="-" and not post="". If for any reason it is unknown (e.g. denovo), post="?" should be used.  </xsd:documentation>
+					</xsd:annotation>
+					<xsd:simpleType>
+						<xsd:restriction base="xsd:string">
+							<xsd:pattern value="[ABCDEFGHIJKLMNOPQRSTUVWXYZ?\-]{1}"/>
+						</xsd:restriction>
+					</xsd:simpleType>
+				</xsd:attribute>
+				<xsd:attribute name="TranslationTable_ref" type="xsd:string" use="optional">
+					<xsd:annotation>
+						<xsd:documentation>A reference to the translation table used if this is PeptideEvidence derived from nucleic acid sequence</xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="frame" type="psi-pi:allowed_frames" use="optional">
+					<xsd:annotation>
+						<xsd:documentation>The translation frame of this sequence if this is PeptideEvidence derived from nucleic acid sequence</xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="isDecoy" type="xsd:boolean" default="false">
+					<xsd:annotation>
+						<xsd:documentation>Set to true if the peptide is matched to a decoy sequence.</xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="missedCleavages" type="xsd:int" use="optional">
+					<xsd:annotation>
+						<xsd:documentation>Number of missed cleavage sites (not required if no enzyme has been used).</xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="ToleranceType">
+		<xsd:annotation>
+			<xsd:documentation>The tolerance of the search given as a plus and minus value with units.</xsd:documentation>
+		</xsd:annotation>
+		<xsd:sequence>
+			<xsd:element ref="cvParam" maxOccurs="unbounded"/>
+		</xsd:sequence>
+	</xsd:complexType>
+	<xsd:complexType name="spectrumIDFormatType">
+		<xsd:annotation><xsd:documentation>The format of the spectrum identifier within the source file</xsd:documentation></xsd:annotation>
+		<xsd:sequence>
+			<xsd:element ref="cvParam"/>
+		</xsd:sequence>
+	</xsd:complexType>	
+	<xsd:complexType name="PSI-PI.analysis.search.DBSequenceType">
+		<xsd:annotation>
+			<xsd:documentation>A database sequence from the specified SearchDatabase (nucleic acid or amino acid). If the sequence is nucleic acid, the source nucleic acid sequence should be given in the seq attribute rather than a translated sequence.</xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Bio.ConceptualMolecule.ConceptualMoleculeType">
+				<xsd:sequence>
+					<xsd:element name="seq" type="sequence" minOccurs="0">
+						<xsd:annotation>
+							<xsd:documentation>The actual sequence of amino acids or nucleic acid.</xsd:documentation>
+						</xsd:annotation>
+					</xsd:element>
+					<xsd:group ref="ParamGroup" minOccurs="0" maxOccurs="unbounded"/>
+				</xsd:sequence>
+				<xsd:attribute name="length" type="xsd:int">
+					<xsd:annotation>
+						<xsd:documentation>The length of the sequence as a number of bases or residues.</xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="SearchDatabase_ref" type="xsd:string" use="required">
+					<xsd:annotation>
+						<xsd:documentation>The source database of this sequence.</xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+				<xsd:attribute name="accession" type="xsd:string" use="required">
+					<xsd:annotation>
+						<xsd:documentation>The unique accession of this sequence.</xsd:documentation>
+					</xsd:annotation>
+				</xsd:attribute>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="SampleType">
+		<xsd:annotation>
+			<xsd:documentation>A description of the sample analysed by mass spectrometry using CVParams or UserParams. If a composite sample has been analysed, a parent sample should be defined, which references subsamples. </xsd:documentation>
+		</xsd:annotation>
+		<xsd:complexContent>
+			<xsd:extension base="FuGE.Bio.Material.MaterialType">
+				<xsd:sequence>
+					<xsd:element name="subSample" type="subSampleType" minOccurs="0" maxOccurs="unbounded"/>
+				</xsd:sequence>
+			</xsd:extension>
+		</xsd:complexContent>
+	</xsd:complexType>
+	<xsd:complexType name="subSampleType">
+		<xsd:annotation>
+			<xsd:documentation>References to the individual component samples within a mixed parent sample.</xsd:documentation>
+		</xsd:annotation>
+		<xsd:attribute name="Sample_ref" type="xsd:string" use="required"/>
+	</xsd:complexType>
+	<xsd:simpleType name="PSI-PI.analysis.search.SiteRegexpType">
+		<xsd:annotation>
+			<xsd:documentation>Regular expression for specifying the enzyme cleavage site.</xsd:documentation>
+		</xsd:annotation>
+		<xsd:restriction base="xsd:string"/>
+	</xsd:simpleType>
+	<xsd:simpleType name="listOfIntegers">
+		<xsd:list itemType="xsd:integer"/>
+	</xsd:simpleType>
+	<xsd:simpleType name="listOfFloats">
+		<xsd:list itemType="xsd:float"/>
+	</xsd:simpleType>
+	<xsd:simpleType name="listOfChars">
+		<xsd:list itemType="chars"/>
+	</xsd:simpleType>
+	<xsd:simpleType name="chars">
+		<xsd:restriction base="xsd:string">
+			<xsd:pattern value="[ABCDEFGHIJKLMNOPQRSTUVWXYZ]{1}"/>
+		</xsd:restriction>
+	</xsd:simpleType>
+	<xsd:simpleType name="sequence">
+		<xsd:restriction base="xsd:string">
+			<xsd:pattern value="[ABCDEFGHIJKLMNOPQRSTUVWXYZ]*"/>
+		</xsd:restriction>
+	</xsd:simpleType>
+	<xsd:simpleType name="allowed_frames">
+		<xsd:restriction base="xsd:int">
+			<xsd:enumeration value="3"/>
+			<xsd:enumeration value="2"/>
+			<xsd:enumeration value="1"/>
+			<xsd:enumeration value="-3"/>
+			<xsd:enumeration value="-2"/>
+			<xsd:enumeration value="-1"/>
+		</xsd:restriction>
+	</xsd:simpleType>
+	<xsd:simpleType name="listOfAllowedFrames">
+		<xsd:list itemType="psi-pi:allowed_frames"/>
+	</xsd:simpleType>
+	<xsd:simpleType name="versionRegex">
+		<xsd:restriction base="xsd:string">
+			<xsd:pattern value="(1\.0\.\d+)"/>
+		</xsd:restriction>
+	</xsd:simpleType>
+</xsd:schema>
diff --git a/src/de/rki/ng4/ipig/GeneControl.java b/src/de/rki/ng4/ipig/GeneControl.java
new file mode 100644
index 0000000..940f35e
--- /dev/null
+++ b/src/de/rki/ng4/ipig/GeneControl.java
@@ -0,0 +1,397 @@
+/* Copyright (c) 2012,
+ * Mathias Kuhring, KuhringM at rki.de, Robert Koch-Institut, Germany, 
+ * All rights reserved. For details, please note the license.txt.
+ */
+
+package de.rki.ng4.ipig;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.Properties;
+import java.util.Map.Entry;
+
+import de.rki.ng4.ipig.data.Gene;
+import de.rki.ng4.ipig.data.GeneSet;
+import de.rki.ng4.ipig.exceptions.ExitException;
+import de.rki.ng4.ipig.tools.Configurator;
+import de.rki.ng4.ipig.tools.Info;
+import de.rki.ng4.ipig.tools.Logger;
+import de.rki.ng4.ipig.tools.Translator;
+
+/**
+ * <p>The GeneControl sorts out genes with impractical or inconvenient properties.</p>
+ * 
+ * <p>In first place, the gene annotation has to be consistent with the corresponding aminoacid sequence (reference).</p>
+ * 
+ * <p>The criteria are:</p>
+ * <p>1. Is the cds congruend mod 3?</p>
+ * <p>2. Does the expession of the cds lead to an peptide similar to the provided references?</p> 
+ * <p>3. What is the position of the stop codon?</p> 
+ * 
+ * <p>There will be different output files, one containing the useful gene annotations, one containing the useless once
+ * and a log file with some hints/explanations regarding the rejected annotations. The first file is suitable for the mapping.</p>
+ * 
+ * @author Mathias Kuhring
+ *
+ */
+public class GeneControl {
+
+	private static Translator trans;
+	private static double simThresh;
+
+	public void run() throws ExitException {
+		checkFiles();
+
+		Info info = new Info("start gene control");
+		System.out.println();
+
+		// parameter parsing
+		try{
+			simThresh = Double.parseDouble(Configurator.getProperty("minSimilarity", "0.95"));
+			if (simThresh > 1 || simThresh <= 0);
+		}
+		catch (NumberFormatException e){
+			simThresh = 0.95;
+			String message = "error:\tunsuitable similarity threshold (" + e.getMessage() + ")\n" 
+					+ "\tuse default value instead (minSimilarity=" + simThresh + ")";
+			System.out.println(message);
+			Logger.write(Configurator.getSysProperty("msPepSetName", "genecontrol")+".log", message);
+		}
+
+		// get filenames
+
+		String geneAnnoFile	= Configurator.getProperty("FgeneAnnoFile");
+		File gaFile = new File(geneAnnoFile);
+		String gaFileName = gaFile.getName().substring(0, gaFile.getName().lastIndexOf("."));
+		String geneAaSeqFile = Configurator.getProperty("FgeneAaSeqFile");
+
+		String outputPath = new File(Configurator.getProperty("FoutputPath","")).getAbsolutePath() + "/";
+
+		// load annotations
+		Info.setInfomode(true);
+		Gene.setStrict(false);
+		GeneSet genes = new GeneSet(geneAnnoFile);
+		Info.setInfomode(false);
+		
+		try {
+			// preparing output files
+			BufferedWriter logBuffer = new BufferedWriter (new FileWriter(new File(outputPath + gaFileName + "_control.log")));
+
+			BufferedWriter goodBuffer = new BufferedWriter (new FileWriter(new File(outputPath + gaFileName + "_good.txt")));
+			goodBuffer.write(genes.getHeader());
+			goodBuffer.newLine();
+
+			BufferedWriter badBuffer = new BufferedWriter (new FileWriter(new File(outputPath + gaFileName + "_bad.txt")));
+			badBuffer.write(genes.getHeader());
+			badBuffer.newLine();
+
+			System.out.println("check for non-coding genes and uncommon chromosomes");
+			checkNonCoding(genes, badBuffer, logBuffer);
+			checkUncommonChroms(genes, badBuffer, logBuffer);
+
+			// calc. gene subset size to avoid heap overflow
+			long Xmx = Runtime.getRuntime().freeMemory();
+			int max = (int) Xmx / (1024*1024);
+			int size = genes.size();
+			int parts = (int) Math.ceil((double) size / (double) max);
+			System.out.println("check integrity in " + parts + " parts with " + max + " genes each");
+
+			GeneSet subset;
+
+			if (trans == null)
+				trans = new Translator();
+
+			// loading and checking sequences for each subset
+			for (int i=0; i<parts; i++){
+				Configurator.checkBreak();
+
+				System.out.println("part " + (i+1) + " of " + parts + "...");
+				// getting the subset
+				subset = new GeneSet(genes.getSubset(i*max, i*max+max));
+				// loading the sequences
+				subset.readAaSequences(geneAaSeqFile);
+				subset.readNaSequences(Configurator.getProperty("refSeqPath"));
+
+				String expr;
+				String message;
+				int stop;
+				double similarity;
+				for (Gene gene : subset.getAll()){
+					// checking the gene
+
+					expr = getExpression(gene);
+
+					// case 0: expr == null if the cds is not congruent 0 mod 3, so couldn't translated correct (regarding the codons)
+					if (expr != null){
+						stop = checkStopPos(expr);
+						similarity = checkSimilarity(expr, gene.getAaSequence());
+
+						// case 1: stop at cds end and quite similar to reference
+						if (stop == 0 && similarity >= simThresh){
+							good(goodBuffer, gene);
+						}
+						/* case 2: cds excludes stop but quite similar to reference -> useful after cds extension by on codon:
+						 * usually, the last codon in the cds is a stop codon, which is not included in the aa seq, so cds-length - 1 == aa-seq-length.
+						 * but if the stop codon is not within the cds (e.g. it's unknown or cds is incomplete), cds-length == aa-seq-length.
+						 * this would cause some trouble within the mapping calculations (coordinate transformations of reverse strand mappings in particular).
+						 * So extending the cds by on codon gives us cds-length - 1 == aa-seq-length.
+						 * Indeed, there is no new stop codon now, so you should't use these "adapted" annotations for something else than the mapper.
+						 */
+						else if (stop == 1 && similarity >= simThresh){
+							extendCds(gene);
+							message = "cds excludes stop -> extended by one codon";
+							log(logBuffer, gene, expr, message);
+							good(goodBuffer, gene);
+						}
+						// case 3: stop occurs before cds end -> ejected
+						// might be correctable if reference matches expression up to the stop,
+						// but probably hard to consider introns
+						else if (stop == -1){
+							message = "a stop before cds end";
+							log(logBuffer, gene, expr, message);
+							bad(badBuffer, gene);
+						}
+						// case 4: similarity of 0 means different length compared to reference -> ejected
+						else if (similarity == 0){
+							message = "deviation to reference in length";
+							log(logBuffer, gene, expr, message);
+							bad(badBuffer, gene);
+						}
+						// case 5: if similarity to low, gene might not be trustworthy, well explored or annotated -> ejected
+						else if (stop == 0 && similarity < simThresh){
+							message = "similarity to reference < " + simThresh + " (e.g. to many SNPs etc.)";
+							log(logBuffer, gene, expr, message);
+							bad(badBuffer, gene);
+						}
+						else{
+							message = "uncategorized case (stop = " + stop + ", similarity = " + similarity + ")";
+							log(logBuffer, gene, expr, message);
+							bad(badBuffer, gene);
+						}
+					}
+					else{
+						message = "cds not concruent mod 3 (no expression)";
+						log(logBuffer, gene, expr, message);
+						bad(badBuffer, gene);
+					}
+				}
+
+				subset.deleteAaSequences();
+				subset.deleteNaSequences();
+				subset = null;
+				System.gc();
+			}
+
+			logBuffer.close();
+			goodBuffer.close();
+			badBuffer.close();
+		} catch (IOException e) {
+			throw new ExitException(e.getMessage());
+		}
+
+		Info.setInfomode(true);
+		info.stop();
+		Info.setInfomode(false);
+	}
+
+	private void checkNonCoding(GeneSet genes, BufferedWriter badBuffer, BufferedWriter logBuffer) throws ExitException {
+		Gene gene;
+		for (Iterator<Gene> it = genes.getAll().iterator(); it.hasNext();){
+			gene = it.next();
+			if (gene.getCdsStart() == gene.getCdsEnd()){
+				String message = "non-coding gene";
+				try {
+					logBuffer.newLine();
+					logBuffer.write(gene.getName() + ": " + message);
+					logBuffer.newLine();
+					bad(badBuffer, gene);
+				} catch (IOException e) {
+					throw new ExitException(e.getMessage());
+				}
+				it.remove();
+			}
+		}
+		System.gc();
+	}
+
+	private void checkUncommonChroms(GeneSet genes, BufferedWriter badBuffer, BufferedWriter logBuffer) throws ExitException{
+		Gene gene;
+		for (Iterator<Gene> it = genes.getAll().iterator(); it.hasNext();){
+			gene = it.next();
+			if (!gene.getChrom().matches("chr(\\d+|X|Y|M|[IVX]+)")){
+				String message = "uncommon chromosome (" + gene.getChrom() + ")";
+				try {
+					logBuffer.newLine();
+					logBuffer.write(gene.getName() + ": " + message);
+					logBuffer.newLine();
+					bad(badBuffer, gene);
+				} catch (IOException e) {
+					throw new ExitException(e.getMessage());
+				}
+				it.remove();
+			}
+		}
+		System.gc();
+	}
+
+	// write into the log file
+	private void log(BufferedWriter logBuffer, Gene gene, String expr, String message) throws IOException{
+		logBuffer.newLine();
+		logBuffer.write(gene.getName() + ": " + message);
+		logBuffer.newLine();
+		logBuffer.write(">reference:  " + gene.getAaSequence());
+		logBuffer.newLine();
+		logBuffer.write(">expression: " + expr);
+		logBuffer.newLine();
+	}
+
+	// write good/useful genes
+	private void good(BufferedWriter goodBuffer, Gene gene) throws IOException{
+		goodBuffer.write(gene.toString());
+		goodBuffer.newLine();
+	}
+
+	// write bad/non-useful genes
+	private void bad(BufferedWriter badBuffer, Gene gene) throws IOException{
+		badBuffer.write(gene.toString());
+		badBuffer.newLine();
+	}
+
+	// extends a gene's cds by 3, so one codon
+	private void extendCds(Gene gene){
+		if (gene.getStrand() == '+')
+			gene.shiftCdsEnd(3);
+		else
+			gene.shiftCdsStart(-3);
+	}
+
+	// checks where the first stop (*) occurs, either at the end of the cds/expr (-> 0), before (-> -1) or after (-> 1).
+	private int checkStopPos(String expr){
+		String[] splits = expr.split("\\*");
+
+		// case1: cds includes stop, but not at the end
+		if (splits[0].length() < expr.length()-1){
+			return -1;
+		}
+		// case2: cds excludes stop 
+		if (splits[0].length() > expr.length()-1){
+			return 1;
+		}
+		// case3: cds includes stop at the end (splits[0].length() == expr.length()-1)
+		return 0;
+	}
+
+	// returns the expression of a gene, so the resulting peptide
+	// returns null if the cds is not congruent mod 3
+	private String getExpression(Gene gene) {
+		String cds;
+		if (gene.getStrand() == '+'){
+			cds = gene.getCds();
+		}
+		else{
+			cds = Translator.complement(new StringBuffer(gene.getCds()).reverse().toString());
+		}
+		if (checkCongruence3(cds))
+			return trans.dnaToPeptide(cds);
+		else
+			return null;
+	}
+
+	// checks if the cds is congruent mod 3
+	private boolean checkCongruence3(String cds){
+		if (cds.length() % 3 == 0)
+			return true;
+		else
+			return false;
+	}
+
+	// computes the similarity between the expression (up to an stop codon, if included) and the aa sequence
+	// similarity is given between 0 (lowest) and 1 (highest). If different in length than similarity is 0.
+	private double checkSimilarity(String expr, String aaseq){
+		expr = expr.split("\\*")[0];
+		double distance = hammingDistance(expr, aaseq);
+		double similarity;
+
+		if (distance > -1)
+			similarity = (expr.length()-distance)/expr.length();
+		else
+			similarity = 0;
+
+		return similarity;
+	}
+
+	// an easy metric for similarity computation.
+	// if the seqs have the same length, every different position increases the distance by one.
+	// if the length is different it returns -1-
+	private int hammingDistance(String expr, String aaseq){
+		if (!checkLength(expr, aaseq))
+			return -1;
+
+		int distance = 0;
+
+		for (int c=0; c<expr.length(); c++){
+			if (expr.charAt(c) != aaseq.charAt(c))
+				distance++;
+		}
+
+		return distance;
+	}
+
+	// compares the length between the expression (up to an stop codon, if included) and the aa sequence
+	private boolean checkLength(String expr, String aaseq){
+		if (expr.split("\\*")[0].length() == aaseq.length())
+			return true;
+		else
+			return false;
+	}
+
+	// checks if all necessary files/paths are indicated and if they exist
+	private static void checkFiles() throws ExitException{
+		StringBuffer message = new StringBuffer();
+		boolean failed = false;
+		Properties props = Configurator.getProperties();
+
+		if (!props.containsKey("FgeneAnnoFile")){
+			if (failed) message.append("\n");
+			failed = true;
+			message.append("no gene annotation file specified");
+		}
+		if (!props.containsKey("FgeneAaSeqFile")){
+			if (failed) message.append("\n");
+			failed = true;
+			message.append("no gene aa-sequence file specified");
+		}
+		if (!props.containsKey("refSeqPath")){
+			if (failed) message.append("\n");
+			failed = true;
+			message.append("no chromosome path specified");
+		}
+		if (props.containsKey("FoutputPath") && props.getProperty("FoutputPath").matches("")){
+			Configurator.removeProperty("FoutputPath");
+		}
+		if (failed) throw new ExitException(message.toString());
+
+		for (Entry<Object, Object> entry : Configurator.getProperties().entrySet()){
+			if (((String) entry.getKey()).contains("File")){
+				File test = new File((String) entry.getValue());
+				checkFile(test);
+			}
+			if (((String) entry.getKey()).contains("Path")){
+				File test = new File((String) entry.getValue());
+				if (!test.isDirectory()){
+					throw new ExitException("error: can't find path (" + test.getAbsolutePath() + ")");
+				}
+			}
+		}
+	}
+
+	// a simple file check
+	private static void checkFile(File test) throws ExitException{
+		if (!test.isFile()){
+			throw new ExitException("error: can't find file (" + test.getAbsolutePath() + ")");
+		}
+	}
+}
diff --git a/src/de/rki/ng4/ipig/Ipig.java b/src/de/rki/ng4/ipig/Ipig.java
new file mode 100644
index 0000000..b61036b
--- /dev/null
+++ b/src/de/rki/ng4/ipig/Ipig.java
@@ -0,0 +1,311 @@
+/* Copyright (c) 2012,
+ * Mathias Kuhring, KuhringM at rki.de, Robert Koch-Institut, Germany, 
+ * All rights reserved. For details, please note the license.txt.
+ */
+
+package de.rki.ng4.ipig;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.Map.Entry;
+import java.util.Properties;
+import java.util.Set;
+
+import de.rki.ng4.ipig.data.Exporter;
+import de.rki.ng4.ipig.data.GeneSet;
+import de.rki.ng4.ipig.data.PeptideSet;
+import de.rki.ng4.ipig.exceptions.ExitException;
+import de.rki.ng4.ipig.gui.Downloader;
+import de.rki.ng4.ipig.gui.GeneControlGui;
+import de.rki.ng4.ipig.gui.IpigGui;
+import de.rki.ng4.ipig.mapping.Mapper;
+import de.rki.ng4.ipig.mapping.Validator;
+import de.rki.ng4.ipig.tools.Configurator;
+import de.rki.ng4.ipig.tools.Info;
+import de.rki.ng4.ipig.tools.Logger;
+
+/**
+ * Program entry point, parsing of parameters and loading of configuration file
+ * 
+ * @author Mathias
+ *
+ */
+public class Ipig {
+
+	private static boolean idmapfile1 = true;
+	private static boolean idmapfile2 = true; 
+
+	public static void main (String[] args){
+		try {
+			// parse cmd input arguments (incl. loading config) and init logging with a new file
+			parseArguments(args);
+
+			// check program start
+			if (Configurator.getSysProperty("gui", "false").matches("true"))
+				IpigGui.main(null);
+			else if (Configurator.getSysProperty("control","false").matches("true")){
+				new GeneControl().run();
+			}
+			else if (Configurator.getSysProperty("controlgui", "false").matches("true")){
+				GeneControlGui.main(null);
+			}
+			else if (Configurator.getSysProperty("downloader","false").matches("true")){
+				Downloader.main(null);
+			}
+			else{
+				run();
+			}
+		} 
+		catch (ExitException e) {
+			System.out.println("\n" + e.getMessage());
+		}
+		finally{
+			System.gc();
+		}
+	}
+
+	/**
+	 * Starts the command-line version of the ipig
+	 * 
+	 * @throws ExitException
+	 */
+	public static void run() throws ExitException{
+		if (Configurator.getProperty("infomode", "true").matches("false"))
+			Info.setInfomode(false);
+		else
+			Info.setInfomode(true);
+		
+		checkFiles();
+
+		// init a log file
+		Logger.init(Configurator.getSysProperty("msPepSetName", "ipig")+".log", false);
+
+		// init a Exporter
+		// this is done already, because is checks some parameters
+		Exporter expo = new Exporter();
+
+		// outputPath preparation
+		String outputPath = new File(Configurator.getProperty("outputPath","")).getAbsolutePath() + "/";
+
+		Info info = new Info("start ipig");
+		System.out.println();
+
+		// read peptides
+		PeptideSet mspeps = new PeptideSet(Configurator.getSysProperty("msPepFile"));
+		// try some id mappings
+		if (idmapfile1)	mspeps.readProteinIds(Configurator.getProperty("protMapFile"));
+		if (idmapfile2) mspeps.readFasta(Configurator.getProperty("protSeqFile"));
+
+		Set<String> filter = mspeps.getCommonProts("id").keySet();
+		// read genes with filter and do annotation mapping
+		GeneSet ucscgenes = new GeneSet(Configurator.getProperty("geneAnnoFile"),filter);
+		ucscgenes.readAaSequences(Configurator.getProperty("geneAaSeqFile"));
+		Mapper.annotationMapping(mspeps, ucscgenes);
+		ucscgenes.removeUnused();
+		ucscgenes.deleteAaSequences();
+
+		// read all genes and do alternative mapping
+		GeneSet allgenes = new GeneSet(Configurator.getProperty("geneAnnoFile"));
+		allgenes.readAaSequences(Configurator.getProperty("geneAaSeqFile"));
+		Mapper.alternativeMapping(mspeps, allgenes);
+		allgenes.removeUnused();
+		allgenes.deleteAaSequences();
+
+		mspeps.sortPeptides();
+		if (mspeps.getUnmapped().size() == mspeps.size()){
+			throw new ExitException("no peptide mapped");
+		}
+
+		// write the output files
+		mspeps.writeMapped(outputPath, Configurator.getSysProperty("msPepSetName"));
+		mspeps.writeUnmapped(outputPath, Configurator.getSysProperty("msPepSetName"));
+		expo.bed(mspeps, outputPath, Configurator.getSysProperty("msPepSetName"));
+		expo.gff3(mspeps, outputPath, Configurator.getSysProperty("msPepSetName"));
+
+		//*** This part is just a validation for development testing
+		if (Configurator.getProperty("validation", "false").matches("true")){
+			ucscgenes.addGenes(allgenes.getAll());
+			allgenes = null;
+			System.gc();
+
+			int number = 100000;
+			GeneSet genespart = new GeneSet(ucscgenes.getRandomSubset(number));
+			genespart.readNaSequences(Configurator.getProperty("chromPath"));
+
+			Validator.validate(mspeps, genespart);
+		}
+		//***
+
+		info.stop();
+	}
+
+	// parse the command-line parameters and load the config file
+	private static void parseArguments(String[] args) throws ExitException{
+		switch (args.length){
+		case 0:
+			help();
+			break;
+		case 1:
+			if (args[0].matches("-g") || args[0].matches("-gui")){
+				try {
+					loadConf("ipig.conf");
+				}catch(ExitException e){	 // conf file not absolutely necessary for gui
+					System.out.print("no configuration loaded");
+				}
+				Configurator.setSysProperty("gui", "true");
+			}
+			else if (args[0].matches("-c") || args[0].matches("-control")){
+				loadConf("gc.conf");
+				Configurator.setSysProperty("control", "true");
+			}
+			else if (args[0].matches("-cg") || args[0].matches("-controlgui")){
+				try {
+					loadConf("gc.conf");
+				}catch(ExitException e){	// conf file not absolutely necessary for gui
+					System.out.print("no configuration loaded");
+				}  
+				Configurator.setSysProperty("controlgui", "true");
+			}
+			else if (args[0].matches("-d") || args[0].matches("-downloader")){
+				try {
+					loadConf("dl.conf");
+				}catch(ExitException e){	// conf file not absolutely necessary for gui
+					System.out.print("no configuration loaded");
+				}  
+				Configurator.setSysProperty("downloader", "true");
+			}
+			else if (args[0].matches("-h") || args[0].matches("-help"))
+				help();
+			else if (args[0].startsWith("-")){
+				throw new ExitException("error: unknown paramerer (" + args[0] + ")");
+			}
+			else{
+				File pepfile = new File(args[0]);
+				checkFile(pepfile);
+				loadConf("ipig.conf");
+				Configurator.setSysProperty("msPepFile", pepfile.getAbsolutePath());
+				Configurator.setSysProperty("msPepSetName", pepfile.getName().substring(0,pepfile.getName().lastIndexOf(".")));
+			}
+			break;
+		case 2:
+			loadConf(args[1]);
+			if (args[0].matches("-g") || args[0].matches("-gui"))
+				Configurator.setSysProperty("gui", "true");
+			else if (args[0].matches("-c") || args[0].matches("-control"))
+				Configurator.setSysProperty("control", "true");
+			else if (args[0].matches("-cg") || args[0].matches("-controlgui"))
+				Configurator.setSysProperty("controlgui", "true");
+			else if (args[0].matches("-d") || args[0].matches("-downloader")){
+				Configurator.setSysProperty("downloader", "true");
+			}
+			else if (args[0].matches("-h") || args[0].matches("-help"))
+				help();
+			else if (args[0].startsWith("-")){
+				throw new ExitException("error: wrong paramerer (" + args[0] + ")");
+			}
+			else{
+				File pepfile = new File(args[0]);
+				checkFile(pepfile);
+				Configurator.setSysProperty("msPepFile", pepfile.getAbsolutePath());
+				Configurator.setSysProperty("msPepSetName", pepfile.getName().substring(0,pepfile.getName().lastIndexOf(".")));
+			}
+			break;
+		default:
+			throw new ExitException("error: wrong number of parameters");
+		}
+	}
+
+	// loads a config file
+	private static void loadConf(String filename) throws ExitException{
+		File conffile = new File(filename);
+		checkFile(conffile);
+		Configurator.loadProperties(conffile.getAbsolutePath());
+	}
+
+	// prints the help file
+	private static void help() throws ExitException{
+		try {
+			String help = "/help/ipig_help.txt";
+			InputStream is = Ipig.class.getResourceAsStream(help);
+			BufferedReader helpbf = new BufferedReader(new InputStreamReader(is));
+			while (helpbf.ready()){
+				System.out.println(helpbf.readLine());
+			}
+			is.close();
+		} catch (IOException e) {
+			System.out.println("error: " + e.getLocalizedMessage());
+		}
+		throw new ExitException("");
+	}
+
+	// checks if all necessary files/paths are indicated and if they exist
+	private static void checkFiles() throws ExitException{
+		StringBuffer message = new StringBuffer();
+		boolean failed = false;
+		Properties props = Configurator.getProperties();
+		if (!props.containsKey("msPepFile")){
+			failed = true;
+			message.append("no ms peptide input file specified");
+		}
+		if (!props.containsKey("geneAnnoFile")){
+			if (failed) message.append("\n");
+			failed = true;
+			message.append("no gene annotation file specified");
+		}
+		if (!props.containsKey("geneAaSeqFile")){
+			if (failed) message.append("\n");
+			failed = true;
+			message.append("no gene aa-sequence file specified");
+		}
+		if (!props.containsKey("protMapFile")){
+			idmapfile1 = false;
+			System.out.println("no id mapping file specified\t -> continue without");
+		}
+		else if(props.getProperty("protMapFile").matches("")){
+			Configurator.removeProperty("protMapFile");
+			idmapfile1 = false;
+			System.out.println("no id mapping file specified\t -> continue without");
+		}
+		if (!props.containsKey("protSeqFile")){
+			idmapfile2 = false;
+			System.out.println("no protein fasta file specified\t -> continue without");
+		}
+		else if(props.getProperty("protSeqFile").matches("")){
+			Configurator.removeProperty("protSeqFile");
+			idmapfile2 = false;
+			System.out.println("no protein fasta file specified\t -> continue without");
+		}
+		if (props.getProperty("validation", "false").matches("true") && !props.containsKey("chromPath")){
+			if (failed) message.append("\n");
+			failed = true;
+			message.append("no chromosome path specified");
+		}
+		if (props.containsKey("outputPath") && props.getProperty("outputPath").matches("")){
+			Configurator.removeProperty("outputPath");
+		}
+		if (failed) throw new ExitException(message.toString());
+
+		for (Entry<Object, Object> entry : Configurator.getProperties().entrySet()){
+			if (((String) entry.getKey()).contains("File")){
+				File test = new File((String) entry.getValue());
+				checkFile(test);
+			}
+			if (((String) entry.getKey()).contains("Path")){
+				File test = new File((String) entry.getValue());
+				if (!test.isDirectory()){
+					throw new ExitException("error: can't find path (" + test.getAbsolutePath() + ")");
+				}
+			}
+		}
+	}
+
+	// a simple file check
+	private static void checkFile(File test) throws ExitException{
+		if (!test.isFile()){
+			throw new ExitException("error: can't find file (" + test.getAbsolutePath() + ")");
+		}
+	}
+}
diff --git a/src/de/rki/ng4/ipig/data/Exporter.java b/src/de/rki/ng4/ipig/data/Exporter.java
new file mode 100644
index 0000000..eac7ab3
--- /dev/null
+++ b/src/de/rki/ng4/ipig/data/Exporter.java
@@ -0,0 +1,446 @@
+/* Copyright (c) 2012,
+ * Mathias Kuhring, KuhringM at rki.de, Robert Koch-Institut, Germany, 
+ * All rights reserved. For details, please note the license.txt.
+ */
+
+package de.rki.ng4.ipig.data;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.Vector;
+
+import de.rki.ng4.ipig.exceptions.ExitException;
+import de.rki.ng4.ipig.mapping.Mapper;
+import de.rki.ng4.ipig.mapping.Position;
+import de.rki.ng4.ipig.tools.Configurator;
+import de.rki.ng4.ipig.tools.Info;
+import de.rki.ng4.ipig.tools.Logger;
+
+/**
+ * <p>The Exporter is in charge of exporting peptides (as a @see PeptideSet) into BED of GFF3 files.</p>
+ * 
+ * <p>It will uses some user parameters regarding the coloring or categorization of peptides by scores
+ * from the config file or gui resp.</p>
+ * 
+ * <p>For more informations on the BED or GFF3 format see http://genome.ucsc.edu/goldenPath/help/customTrack.html#BED
+ * or http://www.sequenceontology.org/gff3.shtml resp.</p>
+ * 
+ * @author Mathias Kuhring
+ */
+public class Exporter {
+
+	// user parameters as in a ipig config file or gui, will be loaded in the constructor
+	private int minScore;
+	private int maxScore;
+	private double thresh1;
+	private double thresh2;
+	private String color1;
+	private String color2;
+	private String color3;
+
+	/**
+	 * The Constructor tries to read necessary user parameter from the Configurator.
+	 * 
+	 * @throws ExitException if it fails to parse the user parameters
+	 */
+	public Exporter() throws ExitException{
+		try{
+			minScore = Integer.parseInt(Configurator.getProperty("minScore"));
+			maxScore = Integer.parseInt(Configurator.getProperty("maxScore"));
+			if (minScore >= maxScore){
+				throw new NumberFormatException("minScore >= maxScore");
+			}
+		}
+		catch (NumberFormatException e){
+			String message = "error:\tunsuitable score bounds (" + e.getMessage() + ")";
+			Logger.write(Configurator.getSysProperty("msPepSetName", "ipig")+".log", message);
+			throw new ExitException(message);
+		}
+
+		try{
+			thresh1 = Double.parseDouble(Configurator.getProperty("threshold1"));
+			thresh2 = Double.parseDouble(Configurator.getProperty("threshold2"));
+			if (!((minScore < thresh1) && (thresh1 < thresh2) && (thresh2 < maxScore))){
+				throw new NumberFormatException("relation needed: minScore < threshold1 < threshold2 < maxScore");
+			}
+		}
+		catch (NumberFormatException e){
+			String message = "error:\tunsuitable thresholds (" + e.getMessage() + ")";
+			Logger.write(Configurator.getSysProperty("msPepSetName", "ipig")+".log", message);
+			throw new ExitException(message);
+		}
+
+		try{
+			color1 = Configurator.getProperty("color1");
+			color2 = Configurator.getProperty("color2");
+			color3 = Configurator.getProperty("color3");
+			checkColor(color1);
+			checkColor(color2);
+			checkColor(color3);
+		}
+		catch (NumberFormatException e){
+			String message = "error:\tunsuitable colors (" + e.getMessage() + ")";
+			Logger.write(Configurator.getSysProperty("msPepSetName", "ipig")+".log", message);
+			throw new ExitException(message);
+		}	
+	}
+	
+	/*
+	 * Checks if color is in the correct format and range
+	 * from 000,000,000 up to 255,255,255
+	 */
+	private void checkColor (String color){
+		if (color == null)
+			throw new NumberFormatException("incorrect rgb value: " + color);
+		String[] splits = color.split(",");
+		if (splits.length != 3){
+			throw new NumberFormatException("incorrect rgb value: " + color);
+		}
+		int value;
+		for (String split : splits){
+			value = Integer.parseInt(split);
+			if (value < 0 || value > 255){
+				throw new NumberFormatException("incorrect rgb value: " + color);
+			}
+		}
+	}
+
+	/**
+	 * <p>This function writes a set of peptides into a bed formated file.</p>
+	 * 
+	 * <p>A peptide corresponds to a line in bed format (http://genome.ucsc.edu/goldenPath/help/customTrack.html#BED).</p>
+	 * 
+	 * <p>The bed file will have different tracks for uniqueness (unique, non-unique, unmarked)
+	 * and the mapping methods (annotation or alternative), so at most 3*2 = 6 tracks.
+	 * Unmapped peptides won't be exported.</p>
+	 * 
+	 * <p>Modification to the format are as follows:</p>
+	 * 
+	 * <p>The score is the peptide's original score, so it's not scaled between 0 and 1000.
+	 * For coloring purposes each track uses the parameter itemRgb="On" and each peptide line
+	 * provide an rgb color which depends on the score and color settings in the configuration. </p>
+	 * 
+	 * @param pepset
+	 * @param outputPath
+	 * @param msPepSetName
+	 * @throws ExitException
+	 */
+	public void bed(PeptideSet pepset, String outputPath, String msPepSetName) throws ExitException{
+		Info info = new Info("write bed");
+		int count = 0; 
+
+		try {
+			
+			Peptide pep;
+			
+			// prepare unique peptides
+			Vector<Integer> pepidx = pepset.getUnique("unique");
+			StringBuffer uniqueAnno = new StringBuffer("");
+			StringBuffer uniqueAlt = new StringBuffer("");
+			for (int p=0; p<pepidx.size(); p++){
+				pep = pepset.get(pepidx.get(p));
+				if (pep.isGeneMapped()){
+					if (pep.getMatchMethod().matches(Mapper.ANNOTATION)){
+						if (uniqueAnno.toString().matches(""))
+							uniqueAnno.append("track name=\"" + msPepSetName + " (u,anno)\" description=\"" + msPepSetName + " (unique, annotation mapped)\" visibility=full itemRgb=On\n");
+						uniqueAnno.append(bedFeature(pep) + "\n");
+					}
+					if (pep.getMatchMethod().matches(Mapper.ALTERNATIVE)){
+						if (uniqueAlt.toString().matches(""))
+							uniqueAlt.append("track name=\"" + msPepSetName + " (u,alt)\" description=\"" + msPepSetName + " (unique, alternative mapped)\" visibility=full itemRgb=On\n");
+						uniqueAlt.append(bedFeature(pep) + "\n");
+					}
+					count++;
+				}
+				Configurator.checkBreak();
+			}
+
+			// prepare nonunique peptides
+			pepidx = pepset.getUnique("nonunique");
+			StringBuffer nonuniqueAnno = new StringBuffer("");
+			StringBuffer nonuniqueAlt = new StringBuffer("");
+			for (int p=0; p<pepidx.size(); p++){
+				pep = pepset.get(pepidx.get(p));
+				if (pep.isGeneMapped()){
+					if (pep.getMatchMethod().matches(Mapper.ANNOTATION)){
+						if (nonuniqueAnno.toString().matches(""))
+							nonuniqueAnno.append("track name=\"" + msPepSetName + " (n,anno)\" description=\"" + msPepSetName + " (nonunique, annotation mapped)\" visibility=full itemRgb=On\n");
+						nonuniqueAnno.append(bedFeature(pep) + "\n");
+					}
+					if (pep.getMatchMethod().matches(Mapper.ALTERNATIVE)){
+						if (nonuniqueAlt.toString().matches(""))
+							nonuniqueAlt.append("track name=\"" + msPepSetName + " (n,alt)\" description=\"" + msPepSetName + " (nonunique, alternative mapped)\" visibility=full itemRgb=On\n");
+						nonuniqueAlt.append(bedFeature(pep) + "\n");
+					}
+					count++;
+				}
+				Configurator.checkBreak();
+			}
+
+			// prepare unmarked peptides
+			pepidx = pepset.getUnique("rest");
+			StringBuffer unmarkedAnno = new StringBuffer("");
+			StringBuffer unmarkedAlt = new StringBuffer("");
+			for (int p=0; p<pepidx.size(); p++){
+				pep = pepset.get(pepidx.get(p));
+				if (pep.isGeneMapped()){
+					if (pep.getMatchMethod().matches(Mapper.ANNOTATION)){
+						if (unmarkedAnno.toString().matches(""))
+							unmarkedAnno.append("track name=\"" + msPepSetName + " (anno)\" description=\"" + msPepSetName + " (unmarked, annotation mapped)\" visibility=full itemRgb=On\n");
+						unmarkedAnno.append(bedFeature(pep) + "\n");
+					}
+					if (pep.getMatchMethod().matches(Mapper.ALTERNATIVE)){
+						if (unmarkedAlt.toString().matches(""))
+							unmarkedAlt.append("track name=\"" + msPepSetName + " (alt)\" description=\"" + msPepSetName + " (unmarked, alternative mapped)\" visibility=full itemRgb=On\n");
+						unmarkedAlt.append(bedFeature(pep) + "\n");
+					}
+					count++;
+				}
+				Configurator.checkBreak();
+			}
+
+			BufferedWriter trackBuffer = new BufferedWriter (new FileWriter(new File(outputPath + msPepSetName + "_annomapped.bed")));
+			trackBuffer.write(uniqueAnno.toString() + nonuniqueAnno.toString() + unmarkedAnno.toString());
+			trackBuffer.close();
+			
+			BufferedWriter trackBuffer2 = new BufferedWriter (new FileWriter(new File(outputPath + msPepSetName + "_altmapped.bed")));
+			trackBuffer2.write(uniqueAlt.toString() + nonuniqueAlt.toString() + unmarkedAlt.toString());
+			trackBuffer2.close();
+		} catch (IOException e) {
+			System.out.println(e.getMessage());
+		}
+
+		info.stop(count, pepset.size());
+	}
+
+	/*
+	 * Building a string representation of a peptide as in bed format (http://genome.ucsc.edu/goldenPath/help/customTrack.html#BED).
+	 * Modification to the format are as written in the function bed().
+	 */
+	private String bedFeature(Peptide pep){
+		StringBuffer line = new StringBuffer();
+
+		for (Position pos : pep.getPositions()){
+
+			line.append(pos.getChrom() + "\t");
+			line.append(pos.getStartPos().firstElement() + "\t");
+			line.append(pos.getEndPos().lastElement() + "\t");
+
+			if (pos.getStrand() == '+')
+				line.append(pep.getPep_query() + pos.getModifier() + "_" + pep.getSequence() 
+						+ "_z=" + pep.getPep_exp_z() + "_sh=" + pep.getPositions().size() + "_>" + "\t");
+			else
+				line.append(pep.getPep_query() + pos.getModifier() + "_" + new StringBuffer(pep.getSequence()).reverse().toString() 
+						+ "_z=" + pep.getPep_exp_z() + "_sh=" + pep.getPositions().size() + "_<" + "\t");
+
+			line.append(pep.getPep_Score() + "\t");
+			line.append(pos.getStrand() + "\t");
+			line.append(pos.getStartPos().firstElement() + "\t");
+			line.append(pos.getEndPos().lastElement() + "\t");
+
+			line.append(bedColor(pep) + "\t");
+
+			line.append(pos.getStartPos().size() + "\t");
+			for (int i=0; i<pos.getStartPos().size(); i++){
+				line.append((pos.getEndPos().get(i) - pos.getStartPos().get(i)) + ",");
+			}
+			line.append("\t");
+			for (int i=0; i<pos.getStartPos().size(); i++){
+				line.append((pos.getStartPos().get(i) - pos.getStartPos().firstElement()) + ",");
+			}
+
+			line.append("\n");
+		}
+
+		line.delete(line.lastIndexOf("\n"), line.length());
+		return line.toString();
+	}
+
+	/*
+	 * Determines the rgb string for a peptide's bed string, depending on the score and color settings in the configuration.
+	 */
+	private String bedColor(Peptide pep){
+		double score = 0;
+		try{
+			score = Double.parseDouble(pep.getPep_Score());
+		}
+		catch (NumberFormatException e){
+			String message = "score parsing error:\t" + pep.getSequence() + "\t" + pep.getPep_Score();
+			System.out.println(message);
+			Logger.write(Configurator.getSysProperty("msPepSetName", "ipig")+".log", message);
+		}
+
+		if (score > thresh2) return color3;
+		else if (score > thresh1) return color2;
+		else return color1;
+	}
+
+	/**
+	 * <p>This function writes a set of peptides into gff3 formated files.</p>
+	 * 
+	 * <p>A peptide is represented as a feature in gff3 format (http://www.sequenceontology.org/gff3.shtml).</p>
+	 * 
+	 * <p>The peptides are separated into to different files ending with "_annomapped.gff3" and "_altmapped.gff3"
+	 *  corresponding to the method the peptides are mapped to the genome (Mapper.ANNOTATION or Mapper.ALTERNATIVE).
+	 *  Unmapped peptides won't be exported.</p>
+	 * 
+	 * <p>Modification to the format are as follows:</p>
+	 * 
+	 * <p>The SOURCE is "ipig"</p>
+	 * 
+	 * <p>The values in the TYPE column are a self creations, starting with "peptide", followed by a mark for the uniqueness (unique, non-unique, unmarked)
+	 * and a classification for the score in three groups (depending on user parameters threshold1 and threshold2).</p>
+	 * 
+	 * <p>The SCORE is the same as in the input file.</p>
+	 * 
+	 * <p>ATTRIBUTES are:</p>
+	 * <p>1.)	ID, which is the pep_query plus a modifier to make the id unique.
+	 * 		The ID is in charge for grouping parts of the same feature from different lines,
+	 * 		so the importing software should handle those as one element.</p>
+	 * <p>2.)	Name, which is the peptide sequence.
+	 * 		Best way to keep the sequence in this format and to compare it with translations (e.g. in Geneious).</p>
+	 * <p>3.)	z, custom attribute representing the charge the peptide is measured with.</p>
+	 * <p>4.)	shared, custom attribute representing the peptide's frequency of occurrence on different positions (including the current).</p>
+	 
+	 * @param pepset	A PeptideSet with the peptides to be exported
+	 * @param outputPath Path were the files will be written
+	 * @param msPepSetName	Name of peptide set, will be used as part of the filename
+	 * @throws ExitException 
+	 */
+	public void gff3(PeptideSet pepset, String outputPath, String msPepSetName) throws ExitException{
+		Info info = new Info("write gff3");
+		int count = 0; 
+
+		try {
+			BufferedWriter gffBuffer = new BufferedWriter (new FileWriter(new File(outputPath + msPepSetName + "_annomapped.gff3")));
+			BufferedWriter gffBuffer2 = new BufferedWriter (new FileWriter(new File(outputPath + msPepSetName + "_altmapped.gff3")));
+
+			for (Peptide pep : pepset.getAll()){
+				if (pep.isGeneMapped()){
+					if (pep.getMatchMethod().matches(Mapper.ANNOTATION)){
+						gffBuffer.write(gff3Feature(pep));
+						gffBuffer.newLine();
+					}
+					if (pep.getMatchMethod().matches(Mapper.ALTERNATIVE)){
+						gffBuffer2.write(gff3Feature(pep));
+						gffBuffer2.newLine();
+					}
+					count++;
+				}
+
+				Configurator.checkBreak();
+			}
+
+			gffBuffer.close();
+			gffBuffer2.close();
+		} 
+		catch (IOException e) {
+			System.out.println(e.getMessage());
+		}
+
+		info.stop(count, pepset.size());
+	}
+
+	/*
+	 * Building a string representation of a peptide as a feature in gff3 format (http://www.sequenceontology.org/gff3.shtml).
+	 * Modification to the format are as written in the function gff().
+	 */
+	private String gff3Feature(Peptide pep) {
+		/* Example
+		 * chr10	ipig	peptide_non-unique_low	104111702	104111708	31.83	+	2	ID=8b; Name=LSELLR; z=2; 
+		 * chr10	ipig	peptide_non-unique_low	104112215	104112225	31.83	+	0	ID=8b; Name=LSELLR; z=2; 
+		 */
+
+		StringBuffer lines = new StringBuffer();
+
+		for (Position pos : pep.getPositions()){
+
+			Vector<Integer> phases = phases(pos);
+
+			for (int i=0; i<pos.getStartPos().size(); i++){
+				lines.append(pos.getChrom() + "\t");
+				lines.append("ipig" + "\t");
+
+				if (pep.getPep_isunique().matches("1"))
+					lines.append("peptide_unique_" + gff3ScoreRating(pep) + "\t");
+				else if (pep.getPep_isunique().matches("0"))
+					lines.append("peptide_non-unique_" + gff3ScoreRating(pep) + "\t");
+				else
+					lines.append("peptide_unmarked_" + gff3ScoreRating(pep) + "\t");
+
+				lines.append((pos.getStartPos().get(i) + 1) + "\t");
+				lines.append(pos.getEndPos().get(i) + "\t");
+
+				lines.append(pep.getPep_Score() + "\t");
+				lines.append(pos.getStrand() + "\t");
+				lines.append(phases.get(i) + "\t");
+
+				lines.append("ID=" + pep.getPep_query() + pos.getModifier() + "; ");
+
+				if (pos.getStrand() == '+')
+					lines.append("Name=" + pep.getSequence() + "; ");
+				else
+					lines.append("Name=" + new StringBuffer(pep.getSequence()).reverse().toString() + "; ");
+
+				lines.append("z=" + pep.getPep_exp_z() + "; ");
+				lines.append("shared=" + pep.getPositions().size() + "; ");
+				lines.append("mods=\"" + pep.getPep_var_mod() + "\"; ");
+				lines.append("modpos=" + pep.getPep_var_mod_pos1() + "; ");
+
+				lines.append("\n");
+			}
+		}
+
+		lines.delete(lines.lastIndexOf("\n"), lines.length());
+		return lines.toString();
+	}
+
+	/*
+	 * Classification of a peptides score into three categories: low, mid, high.
+	 * The separation of the three categories is set with the user parameters threshold1 and threshold2
+	 */
+	private String gff3ScoreRating(Peptide pep){
+		double score = 0;
+		try{
+			score = Double.parseDouble(pep.getPep_Score());
+		}
+		catch (NumberFormatException e){
+			String message = "score parsing error:\t" + pep.getSequence() + "\t" + pep.getPep_Score();
+			System.out.println(message);
+			Logger.write(Configurator.getSysProperty("msPepSetName", "ipig")+".log", message);
+		}
+
+		if (score > thresh2) return "high";
+		else if (score > thresh1) return "mid";
+		else return "low";
+	}
+
+	/*
+	 * Calculates the phase of each part the peptides's position (e.g. if separated by an intron).
+	 * Depending on the direction (+ or -) the first resp. last part is assumed to have phase 0.
+	 * The other parts might have phase 1 or 2 if a codon is split by an intron.
+	 * 
+	 * See gff3 format description (http://www.sequenceontology.org/gff3.shtml) for more details regarding directions. 
+	 */
+	private Vector<Integer> phases(Position pos){
+		Vector<Integer> phases = new Vector<Integer>();
+
+		int sum = 0;
+		if (pos.getStrand() == '+'){
+			for (int i=pos.getStartPos().size()-1; i>-1; i--){
+				sum += pos.getEndPos().get(i) - pos.getStartPos().get(i);
+				phases.add(0, sum%3);
+			}
+		}
+		else{
+			for (int i=0; i<pos.getStartPos().size(); i++){
+				sum += pos.getEndPos().get(i) - pos.getStartPos().get(i);
+				phases.add(sum%3);
+			}
+		}
+
+		return phases;
+	}
+}
diff --git a/src/de/rki/ng4/ipig/data/Gene.java b/src/de/rki/ng4/ipig/data/Gene.java
new file mode 100644
index 0000000..8c5112e
--- /dev/null
+++ b/src/de/rki/ng4/ipig/data/Gene.java
@@ -0,0 +1,603 @@
+/* Copyright (c) 2012,
+ * Mathias Kuhring, KuhringM at rki.de, Robert Koch-Institut, Germany, 
+ * All rights reserved. For details, please note the license.txt.
+ */
+
+package de.rki.ng4.ipig.data;
+
+import de.rki.ng4.ipig.exceptions.FormatException;
+
+/**
+ * <p>Gene is a container for diverse informations of a gene, as they are provided by the USCS table browser.</p>
+ * 
+ * <p>The import of USCS annotations should be done with a {@link GeneSet}, which provides reading methods 
+ * and creates an Gene object for any gene (resp. line) in an imported annotation file.</p>
+ * 
+ * <p>Please note: Coordinates (resp. positions) provided by the UCSC are zero-based and half-open.
+ * This means gene positions are indexed like for example Java Arrays from 0 to length-1.</p>
+ * 
+ * <p>The member variables correspond to the USCS table schema for the track "UCSC Genes" and table "knownGene"
+ * or the track "Ensembl Genes" with table "ensGene"
+ * with e.g. clade "Mammal", genome "Human" and group "Genes and Gene Prediction Tracks" (
+ * <a href="http://genome.ucsc.edu/cgi-bin/hgTables?org=Human&db=hg19&hgsid=205072007&hgta_doMainPage=1">
+ * USCS Table Browser</a>).</p>
+ *
+ * @author Mathias Kuhring
+ */
+
+/* 
+ * UCSC Schema for UCSC Genes
+ * field		example				SQL type 			description
+ * name 		uc001aaa.3			varchar(255) 		Name of gene
+ * chrom 		chr1				varchar(255) 		Reference sequence chromosome or scaffold
+ * strand 		+					char(1) 			+ or - for strand
+ * txStart 		11873				int(10) unsigned 	Transcription start position
+ * txEnd 		14409				int(10) unsigned 	Transcription end position
+ * cdsStart 	11873				int(10) unsigned 	Coding region start
+ * cdsEnd 		11873				int(10) unsigned 	Coding region end
+ * exonCount 	3					int(10) unsigned 	Number of exons
+ * exonStarts 	11873,12612,13220,	longblob 	  		Exon start positions
+ * exonEnds 	12227,12721,14409,	longblob 	  		Exon end positions
+ * proteinID  						varchar(40) 		UniProt display ID for Known Genes, UniProt accession or RefSeq protein ID for UCSC Genes
+ * alignID 		uc001aaa.3			varchar(255) 		Unique identifier for each (known gene, alignment position) pair
+ * 
+ * UCSC Schema for Ensembl Genes
+ * field			example			SQL type 				info 	description
+ * bin 				585				smallint(5) unsigned 	range 	Indexing field to speed chromosome range queries.
+ * name 			ENST00000456328	varchar(255) 			values 	Name of gene
+ * chrom 			chr1			varchar(255) 			values 	Reference sequence chromosome or scaffold
+ * strand 			+				char(1) 				values 	+ or - for strand
+ * txStart 			11868			int(10) unsigned 		range 	Transcription start position
+ * txEnd 			14409			int(10) unsigned 		range 	Transcription end position
+ * cdsStart 		14409			int(10) unsigned 		range 	Coding region start
+ * cdsEnd 			14409			int(10) unsigned 		range 	Coding region end
+ * exonCount 		3				int(10) unsigned 		range 	Number of exons
+ * exonStarts 		11868,12612,13220,	longblob 	  				Exon start positions
+ * exonEnds 		12227,12721,14409,	longblob 	  				Exon end positions
+ * score 			0				int(11) 				range 	 
+ * name2 			ENSG00000223972	varchar(255) 			values 	 
+ * cdsStartStat 	none			enum('none', 'unk', 'incmpl', 'cmpl') 	values 	 
+ * cdsEndStat 		none			enum('none', 'unk', 'incmpl', 'cmpl') 	values 	 
+ * exonFrames 		-1,-1,-1,		longblob 	  	 
+ */
+public class Gene{
+
+	// gene source marker
+	public static final String UCSC = "ucsc";
+	public static final String ENSEMBL = "ensembl";
+	
+	// if strict == true, imported genes are filtered for non-codings and uncommon proteins
+	// strict can be set with setStrict(), e.g. useful for GeneControl, so it can check this stuff itself
+	private static boolean strict = true;
+
+	// marks the gene source, either UCSC or ENSEMBLE
+	private String source;
+
+	// gene annotation (ucsc & ensemble)
+	private String name;
+	private String chrom;
+	private char strand;
+	private int txStart;
+	private int txEnd;
+	private int cdsStart;
+	private int cdsEnd;
+	private int exonCount;
+	private int exonStarts[];
+	private int exonEnds[];
+
+	// gene annotation (ucsc)
+	private String proteinID;
+	private String alignID;
+
+	// gene annotation (ensemble)
+	private int bin;
+	private int score;
+	private String name2;
+	private String cdsStartStat;
+	private String cdsEndStat;
+	private String exonFrames;
+
+	// sequences
+	private String sequence;
+	private String aaSequence;
+
+	// marks if a gene was used, that is if any peptide was mapped to this gene
+	private boolean used;
+
+	/**
+	 * Gene is initialized by processing of an UCSC annotation file row to set the member variables
+	 * 
+	 * @param row String containing one gene's annotations, so one line of the UCSC annotation file.
+	 * @throws FormatException 
+	 */
+	public Gene(String row, String source) throws FormatException{
+		this.source = source;
+
+		String[] splits = row.split("\t");
+		checkRow(splits);
+		parseRow(splits);
+	}
+
+	/*
+	 * Checks if the row fits the expected format, either an UCSC or Ensembl gene.
+	 * Genes on uncommon chroms (e.g. chr6_qbl_hap6) and non-protein-coding genes are rejected.
+	 * All fieds are tested with reg. Expressions.
+	 */
+	private void checkRow(String[] splits) throws FormatException{
+		if (source.matches(UCSC)){
+			/* Example:
+			 * #name	chrom	strand	txStart	txEnd	cdsStart	cdsEnd	exonCount	exonStarts	exonEnds	proteinID	alignID
+			 * uc001aaa.3	chr1	+	11873	14409	11873	11873	3	11873,12612,13220,	12227,12721,14409,		uc001aaa.3
+			 * uc010nxq.1	chr1	+	11873	14409	12189	13639	3	11873,12594,13402,	12227,12721,14409,	B7ZGX9	uc010nxq.1
+			 * uc001aut.1	chr1	-	13328195	13331692	13328832	13331671	3	13328195,13330413,13331378,	13329406,13330992,13331692,	NP_001094101	uc001aut.1
+			 * uc011ihh.1	chr6_qbl_hap6	-	1315373	1321962	1317738	1318302	3	1315373,1318888,1321856,	1318491,1318964,1321962,	Q2KJ03	uc011ihh.1
+			 */
+			if (strict && !splits[1].matches("chr(\\d+|X|Y|M|[IVX]+)")){
+				throw new FormatException("uncommon chromosome: " + splits[0] + " " + splits[1]);
+			}
+			if (strict && splits[5].matches(splits[6])){
+				throw new FormatException("non-protein-coding gene: " + splits[0]);	
+			}
+			if (!isUcscLine(splits)){
+				throw new FormatException("incorrect annotation row: " + splits[0]);
+			}
+			
+		}
+		else if (source.matches(ENSEMBL)){
+			/* Example
+			 * #bin	name	chrom	strand	txStart	txEnd	cdsStart	cdsEnd	exonCount	exonStarts	exonEnds	score	name2	cdsStartStat	cdsEndStat	exonFrames
+			 * 0	ENST00000237247	chr1	+	66999065	67210057	67000041	67208778	27	66999065,66999928,67091529,67098752,67099762,67105459,67108492,67109226,67126195,67133212,67136677,67137626,67138963,67142686,67145360,67147551,67149789,67154830,67155872,67161116,67184976,67194946,67199430,67205017,67206340,67206954,67208755,	66999090,67000051,67091593,67098777,67099846,67105516,67108547,67109402,67126207,67133224,67136702,67137678,67139049,67142779,67145435,67148052,67149870,67154958,67155999,6716117 [...]
+			 * 0	ENST00000371039	chr1	+	66999274	67210768	67000041	67208778	22	66999274,66999928,67091529,67098752,67105459,67108492,67109226,67136677,67137626,67138963,67142686,67145360,67154830,67155872,67160121,67184976,67194946,67199430,67205017,67206340,67206954,67208755,	66999355,67000051,67091593,67098777,67105516,67108547,67109402,67136702,67137678,67139049,67142779,67145435,67154958,67155999,67160187,67185088,67195102,67199563,67205220,67206405,67207119,67210768,	0	ENSG00000118473	cmpl	c [...]
+			 * 0	ENST00000424320	chr1	+	66999297	67145425	67000041	67145425	13	66999297,66999928,67091529,67098752,67101626,67105459,67108492,67109226,67136677,67137626,67138963,67142686,67145360,	66999355,67000051,67091593,67098777,67101698,67105516,67108547,67109402,67136702,67137678,67139049,67142779,67145425,	0	ENSG00000118473	cmpl	incmpl	-1,0,1,2,0,0,0,1,0,1,2,1,1,
+			 * 
+			 * #bin	name	chrom	strand	txStart	txEnd	cdsStart	cdsEnd	exonCount	exonStarts	exonEnds	score	name2	cdsStartStat	cdsEndStat	exonFrames
+			 * 73	YAL012W	chrI	+	130798	131983	130798	131983	1	130798,	131983,	0	YAL012W	cmpl	cmpl	0,
+			 */
+			if (strict && !splits[2].matches("chr(\\d+|X|Y|M|[IVX]+)")){
+				throw new FormatException("unusable chromosome: " + splits[1] + " " + splits[2]);
+			}
+			if (strict && splits[6].matches(splits[7])){
+				throw new FormatException("non-protein-coding gene: " + splits[1]);	
+			}
+			if (!isEnsemblLine(splits)){
+				throw new FormatException("incorrect annotation row: " + splits[1]);
+			}
+			
+		}
+		else
+			throw new FormatException("unusable source: " + source);
+	}
+	
+	public static boolean isUcscLine(String[] splits){
+		return splits.length == 12 &&
+				splits[0].matches("\\w+\\.\\d") && splits[1].matches("\\w+") &&
+				splits[2].matches("[\\+\\-]") && splits[3].matches("\\d+") &&
+				splits[4].matches("\\d+") && splits[5].matches("\\d+") &&
+				splits[6].matches("\\d+") && splits[7].matches("\\d+") &&
+				splits[8].matches("(\\d+,)++") && splits[9].matches("(\\d+,)++") &&
+				splits[10].matches("[\\w\\-]*") && splits[11].matches("\\w+\\.\\d");
+	}
+	
+	public static boolean isEnsemblLine(String[] splits){
+		return splits.length == 16 &&
+				splits[0].matches("\\d+") && splits[1].matches("[\\w\\-\\(\\)]+") && splits[2].matches("\\w+") &&
+				splits[3].matches("[\\+\\-]") && splits[4].matches("\\d+") &&
+				splits[5].matches("\\d+") && splits[6].matches("\\d+") &&
+				splits[7].matches("\\d+") && splits[8].matches("\\d+") &&
+				splits[9].matches("(\\d+,)++") && splits[10].matches("(\\d+,)++") &&
+				splits[11].matches("\\d+") && splits[12].matches("[\\w\\-\\(\\)]+") &&
+				splits[13].matches("\\w+") && splits[14].matches("\\w+") &&
+				splits[15].matches("(-?\\d+,)+");
+	}
+
+	/*
+	 * Parses the annotation fields and sets the corresponding variables
+	 */
+	private void parseRow(String[] splits){	
+		if (source.matches(UCSC)){
+			// #name	chrom	strand	txStart	txEnd	cdsStart	cdsEnd	exonCount	exonStarts	exonEnds	proteinID	alignID
+			int index = 0;
+			
+			name = splits[index++];
+			chrom = splits[index++];
+			strand = splits[index++].charAt(0);
+			txStart = Integer.parseInt(splits[index++]);
+			txEnd = Integer.parseInt(splits[index++]);
+			cdsStart = Integer.parseInt(splits[index++]);
+			cdsEnd = Integer.parseInt(splits[index++]);
+			exonCount = Integer.parseInt(splits[index++]);
+
+			exonStarts = new int[exonCount];
+			exonEnds = new int[exonCount];
+			String starts[] = splits[index++].split(",");
+			String ends[] = splits[index++].split(",");
+			for (int i=0; i<exonCount; i++){
+				exonStarts[i] = Integer.parseInt(starts[i]);
+				exonEnds[i] = Integer.parseInt(ends[i]);
+			}
+
+			// import protein id without isoform number (in case of UniProtKB-AC) -> split("-")
+			proteinID = splits[index++].split("-")[0];
+			alignID = splits[index++];
+		}
+		
+		if (source.matches(ENSEMBL)){
+			// #bin	name	chrom	strand	txStart	txEnd	cdsStart	cdsEnd	exonCount	exonStarts	exonEnds	score	name2	cdsStartStat	cdsEndStat	exonFrames
+			int index = 0;
+			
+			bin = Integer.parseInt(splits[index++]);
+			name = splits[index++];
+			chrom = splits[index++];
+			strand = splits[index++].charAt(0);
+			txStart = Integer.parseInt(splits[index++]);
+			txEnd = Integer.parseInt(splits[index++]);
+			cdsStart = Integer.parseInt(splits[index++]);
+			cdsEnd = Integer.parseInt(splits[index++]);
+			exonCount = Integer.parseInt(splits[index++]);
+
+			exonStarts = new int[exonCount];
+			exonEnds = new int[exonCount];
+			String starts[] = splits[index++].split(",");
+			String ends[] = splits[index++].split(",");
+			for (int i=0; i<exonCount; i++){
+				exonStarts[i] = Integer.parseInt(starts[i]);
+				exonEnds[i] = Integer.parseInt(ends[i]);
+			}
+			
+			score = Integer.parseInt(splits[index++]);
+			name2 = splits[index++];
+			cdsStartStat = splits[index++];
+			cdsEndStat = splits[index++];
+			exonFrames = splits[index++];
+		}
+	}
+
+	/**
+	 * Returns a String aggregating all annotations in one line (row) in the same format as the gene had in the UCSC annotation file.
+	 * 
+	 * @see java.lang.Object#toString()
+	 * @return Annotations in a one line String
+	 */
+	public String toString(){
+		if (source.matches(UCSC))
+			return name + "\t" + chrom + "\t" + strand + "\t" + txStart + "\t" + txEnd
+				+ "\t" + cdsStart + "\t" + cdsEnd + "\t" + exonCount + "\t" + exons() + "\t" + proteinID + "\t" + alignID;
+		else // source.matches(ENSEMBL)
+			return bin + "\t" + name + "\t" + chrom + "\t" + strand + "\t" + txStart + "\t" + txEnd
+					+ "\t" + cdsStart + "\t" + cdsEnd + "\t" + exonCount + "\t" + exons() + "\t" + score
+					+ "\t" + name2 + "\t" + cdsStartStat + "\t" + cdsEndStat + "\t" +  exonFrames;
+	}
+
+	// returns the exon start and end list as string for gene export (e.g. toString())
+	private String exons(){
+		StringBuffer starts = new StringBuffer();
+		StringBuffer ends = new StringBuffer();
+
+		for (int i=0; i<exonStarts.length; i++){
+			starts.append(exonStarts[i] + ",");
+			ends.append(exonEnds[i] + ",");
+		}
+
+		return starts.toString() + "\t" + ends.toString();
+	}
+
+	/**
+	 * Returns the name of the gene
+	 * 
+	 * @return Name of the gene
+	 */
+	public String getName(){
+		return name;
+	}
+
+	/**
+	 * Returns the chromosome the gene is located on as String (e.g. "chr6").
+	 * 
+	 * @return String describing a chromosome
+	 */
+	public String getChrom(){
+		return chrom;
+	}
+
+	/**
+	 * Returns the dna strand the gene is located on, resp. the orientation of the gene on the chromosome.
+	 * 
+	 * Annotation data is usually provided only for resp. as one single strand in 5'->3' direction.
+	 * 
+	 * @return '+' for the 5'->3' or '-' for 3'->5' direction
+	 */
+	public char getStrand(){
+		return strand;
+	}
+
+	/**
+	 * Returns the transcription start position.
+	 * 
+	 * @return Transcription start position.
+	 */
+	public int getTxStart(){
+		return txStart;
+	}
+
+	/**
+	 * Returns the transcription end position.
+	 * 
+	 * @return Transcription end position.
+	 */
+	public int getTxEnd(){
+		return txEnd;
+	}
+
+	/**
+	 * Returns the start position of the coding sequence, so usually position of the Start-Codon if strand '+', resp. Stop-Codon if strand '-'.
+	 * 
+	 * @return Position of CdsStart
+	 */
+	public int getCdsStart(){
+		return cdsStart;
+	}
+
+	/**
+	 * Returns the end position of the coding sequence, so usually position after the stop-codon if strand '+', resp. start-codon if strand '-'.
+	 * (Remember: zero-based and half-open, see {@link Gene}).
+	 * 
+	 * @return Position of CdsEnd
+	 */
+	public int getCdsEnd(){
+		return cdsEnd;
+	}
+
+	/**
+	 * Returns the number of exons.
+	 * 
+	 * @return Number of exons
+	 */
+	public int getExonCount(){
+		return exonCount;
+	}
+
+	/**
+	 * Returns all exon start positions.
+	 * 
+	 * @return List of exon start positions
+	 */
+	public int[] getExonStarts(){
+		return exonStarts;
+	}
+
+	/**
+	 * Returns all exon end positions.
+	 * (Remember: zero-based and half-open, see {@link Gene}).
+	 * 
+	 * @return List of exon end positions
+	 */
+	public int[] getExonEnds(){
+		return exonEnds;
+	}
+
+	/**
+	 * Returns the protein reference (UniProtKB-AC or RefSeq for UCSC, name (corr. Ensembl_TRS) for Ensembl) of the protein the gene encodes.
+	 * @return Protein reference string
+	 */
+	public String getProteinReference(){
+		if (source.matches(UCSC))
+			return proteinID;
+		else // source.matches(ENSEMBL)
+			return name;
+	}
+
+	/**
+	 * <p>Sets the gene's DNA sequence, which must range from the transcription start position (including) to the transcription end position (excluding).</p>
+	 * 
+	 * <p>Should be used carefully because later calculations may depend on the sequence's correctness, especially regarding the correct relation to txStart and txEnd.</p>
+	 * 
+	 * @param sequence The gene's DNA sequence as String
+	 */
+	public void setSequence(String sequence) {
+		this.sequence = sequence;
+	}
+
+	/**
+	 * Returns the gene's DNA sequence as set with {@link Gene#setSequence(String)}, usually located at [txStart,txEnd) in the corresponding chromosom.
+	 * 
+	 * @return The gene's DNA sequence as String if set, else null
+	 */
+	public String getSequence() {
+		return sequence;
+	}
+
+	/**
+	 * <p>Sets the aminoacid sequence this gene is coding for.</p>
+	 * 
+	 * <p>Should be used carefully because later calculations may depend on the sequence's correctness.</p>
+	 * 
+	 * @param aaSequence Aminoacid sequence as String
+	 */
+	public void setAaSequence(String aaSequence) {
+		this.aaSequence = aaSequence;
+	}
+
+	/**
+	 * <p>Returns the aminoacid sequence this gene is coding for, as set with {@link Gene#setAaSequence(String)}.</p>
+	 * 
+	 * @return Aminoacid sequence as String if set, else null
+	 */
+	public String getAaSequence() {
+		return aaSequence;
+	}
+
+	/**
+	 * <p>Returns the gene's coding sequence (CDS) in 5'->3' direction, if the DNA sequence is available. 
+	 * This is the gene's dna sequence starting with the start codon and usually ending with the stop codon but without introns.</p>
+	 * 
+	 * @return Coding sequence (CDS), if the DNA sequence is available, null otherwise
+	 */
+	public String getCds(){	
+		if (sequence == null)
+			return null;
+
+		int[] starts = getExonStarts().clone();
+		int[] ends = getExonEnds().clone();
+		int min = 0;
+		int max = ends.length;
+		while (ends[min] < getCdsStart()){
+			min++;
+		}
+		while (starts[max-1] > getCdsEnd()){
+			max--;
+		}
+		starts[min] = getCdsStart();
+		ends[max-1] = getCdsEnd();
+
+		StringBuffer dna = new StringBuffer();
+
+		int start;
+		int end;
+
+		for (int i=min; i<max; i++){
+			start = starts[i] - txStart;
+			end = ends[i] - txStart;
+			dna.append(getSequence().substring(start, end));
+		}
+
+		return dna.toString().toUpperCase();
+	}
+
+	/**
+	 * Returns the gene's coding sequence (CDS) length.
+	 * 
+	 * @return CDS length
+	 */
+	public int getCdsLength(){	
+		int length = 0;
+
+		int[] starts = getExonStarts().clone();
+		int[] ends = getExonEnds().clone();
+		int min = 0;
+		int max = ends.length;
+		while (ends[min] < getCdsStart()){
+			min++;
+		}
+		while (starts[max-1] > getCdsEnd()){
+			max--;
+		}
+		starts[min] = getCdsStart();
+		ends[max-1] = getCdsEnd();
+
+		for (int i=min; i<max; i++){
+			length += ends[i] - starts[i];
+		}
+
+		return length;
+	}
+
+	/**
+	 * Sets the marker "used", which might be useful for expensive operations like loading sequences and keeping them in memory.
+	 * 
+	 * @param used Set to "true" if used, "false" otherwise
+	 */
+	public void setUsed(boolean used) {
+		this.used = used;
+	}
+
+	/**
+	 * Returns the value of the "used" marker.
+	 * 
+	 * @return true or false, depending on how it's set with {@link #setUsed(boolean used)}
+	 */
+	public boolean isUsed(){
+		return used;
+	}
+
+	/**
+	 * <p>Shifts a cdsStart by a given value, tries to correct txStart and exonStarts near the "new" cdsStart position.
+	 * 
+	 * <p>If a cdsStart ends up in an intron the nearest exonStart will be extended.
+	 * If a cdsStart ends up in an exon (either the original or any other) no exon will be changed.</p>
+	 * 
+	 * <p>In other words, transcription length is not considered to change the same in length as the cds might do,
+	 * except the new cdsStart is inside or in front of the original exon.</p>
+	 * 
+	 * @param shift Size of the shift
+	 */
+	public void shiftCdsStart(int shift) {
+		// cdsStart adjustment
+		cdsStart += shift;
+		
+		// txStart correction
+		if (cdsStart < txStart) txStart = cdsStart;
+		
+		// exonStarts correction
+		for (int i=0; i<exonStarts.length; i++){
+			if (cdsStart < exonStarts[i]){
+				exonStarts[i] = cdsStart;
+				break;
+			}
+			if (exonStarts[i] <= cdsStart && cdsStart <= exonEnds[i]){
+				break;
+			}
+		}
+	}
+
+	/**
+	 * <p>Shifts a cdsEnd by a given value, tries to correct txEnd and exonEnds near the "new" cdsStart position.
+	 * 
+	 * <p>If a cdsEnd ends up in an intron the nearest exonEnd will be extended.
+	 * If a cdsEnd ends up in an exon (either the original or any other) no exon will be changed.</p>
+	 * 
+	 * <p>In other words, transcription length is not considered to change the same in length as the cds might do,
+	 * except the new cdsEnd is inside or behind of the original exon.</p>
+	 * 
+	 * @param shift Size of the shift
+	 */
+	public void shiftCdsEnd(int shift) {
+		// cdsEnd adjustment
+		cdsEnd += shift;
+		
+		// txEnd correction
+		if (cdsEnd > txEnd) txEnd = cdsEnd;
+		
+		// exonEnds correction
+		for (int i=exonEnds.length-1; i>=0; i--){
+			if (cdsEnd > exonEnds[i]){
+				exonEnds[i] = cdsEnd;
+				break;
+			}
+			if (exonStarts[i] < cdsEnd && cdsEnd <= exonEnds[i]){
+				break;
+			}
+		}
+	}
+
+	/**
+	 * Returns the source for this gene annotation, either Gene.UCSC if the gene is an UCSC knownGene, or Gene.ENSEMBL if it is an Ensemble ensGene.
+	 * 
+	 * @return String indication the annotation's origin
+	 */
+	public String getSource() {
+		return source;
+	}
+
+	/**
+	 * Describes the strictness of rejecting imported genes (See {@link #setStrict(boolean)}). 
+	 * 
+	 * @return true, if genes are strictly rejected
+	 */
+	public static boolean isStrict() {
+		return strict;
+	}
+
+	/**
+	 * Set the strictness of rejecting imported genes.
+	 * 
+	 * true : non-coding genes and genes on uncommon chromosomes are rejected.
+	 * false: only genes with unrecognized formats (due to regex) are rejected.
+	 * 
+	 * default is true.
+	 * 
+	 * @param strict
+	 */
+	public static void setStrict(boolean strict) {
+		Gene.strict = strict;
+	}
+}
diff --git a/src/de/rki/ng4/ipig/data/GeneSet.java b/src/de/rki/ng4/ipig/data/GeneSet.java
new file mode 100644
index 0000000..2572a17
--- /dev/null
+++ b/src/de/rki/ng4/ipig/data/GeneSet.java
@@ -0,0 +1,674 @@
+/* Copyright (c) 2012,
+ * Mathias Kuhring, KuhringM at rki.de, Robert Koch-Institut, Germany, 
+ * All rights reserved. For details, please note the license.txt.
+ */
+
+package de.rki.ng4.ipig.data;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map.Entry;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.Vector;
+import java.util.regex.Pattern;
+
+import de.rki.ng4.ipig.exceptions.ExitException;
+import de.rki.ng4.ipig.exceptions.FormatException;
+import de.rki.ng4.ipig.tools.Configurator;
+import de.rki.ng4.ipig.tools.Info;
+import de.rki.ng4.ipig.tools.Logger;
+
+/**
+ * GeneSet handles the annotation informations provided for UCSC Genes.
+ * This includes import methods and creation of a Gene object for each gene imported from a file.
+ * 
+ * @author Mathias Kuhring
+ */
+public class GeneSet{
+
+	// The GeneSet's genes
+	private Vector<Gene> genes;
+
+	// variables to check the input files
+	private final String ucscHeader = "#name	chrom	strand	txStart	txEnd	cdsStart	cdsEnd	exonCount	exonStarts	exonEnds	proteinID	alignID";
+	private final String ensembleHeader = "#bin	name	chrom	strand	txStart	txEnd	cdsStart	cdsEnd	exonCount	exonStarts	exonEnds	score	name2	cdsStartStat	cdsEndStat	exonFrames";
+	boolean ucsc;
+	boolean ensembl;
+
+	/**
+	 * <p>Initialization of a GeneSet by giving a filename of an UCSC gene annotations export file
+	 * (either with track "UCSC Genes" and table "knownGene" or the track "Ensembl Genes" with table "ensGene").</p>
+	 * 
+	 * <p>The version with filter {@link #GeneSet(String, Set)} should be preferred as it might improve the overall performance.</p>
+	 * 
+	 * <p>The file must be tab-separated and start with one of the following headers corresponding to the UCSC table schema:<br>
+	 * knownGene:	"#name	chrom	strand	txStart	txEnd	cdsStart	cdsEnd	exonCount	exonStarts	exonEnds	proteinID	alignID"<br>
+	 * ensGene:		"#bin	name	chrom	strand	txStart	txEnd	cdsStart	cdsEnd	exonCount	exonStarts	exonEnds	score	name2	cdsStartStat	cdsEndStat	exonFrames"</p>
+	 * 
+	 * <p>For further informations on the UCSC table schema see {@link Gene} </p>
+	 * 
+	 * @see #readAnnotations(String filename)
+	 * @param filename Name of an UCSC annotation file.
+	 * @throws ExitException 
+	 */
+	public GeneSet(String filename) throws ExitException{
+		genes = new Vector<Gene>();
+		readAnnotations(filename);
+	}
+
+	/**
+	 * <p>Initialization of a GeneSet by giving a filename of an UCSC gene annotations export file
+	 * (either with track "UCSC Genes" and table "knownGene" or the track "Ensembl Genes" with table "ensGene")
+	 * and a Set of protein IDs (UniProtKB-AC and RefSeq) for filtering.</p>
+	 * 
+	 * <p>The file must be tab-separated and start with one of the following headers corresponding to the UCSC table schema:<br>
+	 * knownGene:	"#name	chrom	strand	txStart	txEnd	cdsStart	cdsEnd	exonCount	exonStarts	exonEnds	proteinID	alignID"</p>
+	 * ensGene:		"#bin	name	chrom	strand	txStart	txEnd	cdsStart	cdsEnd	exonCount	exonStarts	exonEnds	score	name2	cdsStartStat	cdsEndStat	exonFrames"</p>
+	 * 
+	 * <p>For further informations on the UCSC table schema see {@link Gene} </p>
+	 * 
+	 * <p>Only annotations of genes coding for a protein in the filter set will be imported!</p>
+	 * 
+	 * @see #readAnnotations(String, Set)
+	 * @param filename Name of an UCSC annotation file.
+	 * @param idFilter Set of Strings, each corresponding to an Uniprot protein ID
+	 * @throws ExitException 
+	 */
+	public GeneSet(String filename, Set<String> idFilter) throws ExitException{
+		genes = new Vector<Gene>();
+		readAnnotations(filename, idFilter);
+	}
+
+	/**
+	 * <p>Initialization of a GeneSet by giving a Vector with genes.</p>
+	 * 
+	 * <p>The genes are added to this new GeneSet, if the all have the same source (either Gene.UCSC or Gene.ENSEMBL).
+	 * Otherwise this GeneSet stays empty.</p>
+	 * 
+	 * @param genes A Vector containing genes ({@link Gene})
+	 */
+	public GeneSet(Vector<Gene> genes){
+		this.genes = new Vector<Gene>();
+		if (sameSource(genes)){
+			this.genes.addAll(genes);
+			ucsc = genes.get(0).getSource().matches(Gene.UCSC);
+			ensembl = genes.get(0).getSource().matches(Gene.ENSEMBL);
+		}
+	}
+	
+	/**
+	 * <p>Adds {@link Gene}s from a Vector to this GeneSet if they have the same source as the present genes.</p>
+	 * 
+	 * @param input A Vector containing genes ({@link Gene})
+	 * @return true if genes were added, else false
+	 */
+	public boolean addGenes(Vector<Gene> input){
+		boolean same;
+		if (same = sameSource(input))
+			genes.addAll(input);
+		return same;
+	}
+
+	/*
+	 * Checks if the genes in a vector have all the same source among each other 
+	 * and as the genes in this GeneSet (only if there are already some).
+	 */
+	private boolean sameSource(Vector<Gene> input){
+		boolean same = true;
+		String source;
+
+		if (ucsc) source = Gene.UCSC;
+		else if (ensembl) source = Gene.ENSEMBL;
+		else source = input.get(0).getSource();
+
+		for (Gene gene : input){
+			same &= gene.getSource().matches(source);
+		}
+
+		return same;
+	}
+
+	/**
+	 * <p>Returns the number of Genes in this GeneSet.</p>
+	 * <p>Together with the {@link #get(int)}-Method it can be used for loops e.g., cause it just calls the size-method of the vector containing the GeneAnnotations</p>
+	 * 
+	 * @return Size of the GeneSet, resp. number of Genes.
+	 */
+	public int size(){
+		return genes.size();
+	}
+
+	/**
+	 * <p>Returns the arg0-th Gene in this GeneSet.</p>
+	 * <p>Together with the {@link #size()}-Method it can be used for loops, cause it just calls the get-method of the vector containing the Genes</p>
+	 * 
+	 * @return arg0-th Gene of the GeneSet.
+	 */
+	public Gene get(int arg0){
+		return genes.get(arg0);
+	}
+
+	/**
+	 * Returns all genes in this GeneSet in a Vector.
+	 * 
+	 * @return Vector with {@link Gene}s
+	 */
+	public Vector<Gene> getAll(){
+		return genes;
+	}
+
+	/*
+	 * Imports a UCSC gene annotation file named with filename.
+	 * See also GeneSet(String).
+	 */
+	private void readAnnotations(String filename) throws ExitException{
+		Info info = new Info("read annotations");
+		int lineCount = 0;
+		int unused = 0;
+
+		try {
+			BufferedReader annoBuffer = new BufferedReader(new FileReader(new File(filename)));
+
+			if (annoBuffer.ready()){
+				String tmpheader = annoBuffer.readLine();
+				
+				// check the header line
+				ucsc = tmpheader.matches(ucscHeader);
+				ensembl = tmpheader.matches(ensembleHeader);
+
+				// if not a expected header, check if data format fits
+				if (!(ucsc || ensembl)){
+					ucsc = Gene.isUcscLine(tmpheader.split("\t"));
+					ensembl = Gene.isEnsemblLine(tmpheader.split("\t"));
+					if (ucsc || ensembl){
+						annoBuffer.close();
+						annoBuffer = new BufferedReader(new FileReader(new File(filename)));
+					}
+				}
+				
+				if (ucsc || ensembl){
+					String line;
+					while (annoBuffer.ready()){
+						line = annoBuffer.readLine();
+						
+						// check line for correct number of fields
+						if ((ucsc && line.split("\t").length != 12) || (ensembl && line.split("\t").length != 16))
+							throw new ExitException("error: couldn't parse annotations (wrong # of fields in line " + (lineCount+2) + ")");
+						try {
+							if (ucsc)
+								genes.add(new Gene(line, Gene.UCSC));	
+							else if (ensembl)
+								genes.add(new Gene(line, Gene.ENSEMBL));	
+						} 
+						catch (FormatException e) {
+							Logger.write(Configurator.getSysProperty("msPepSetName", "ipig")+".log", e.getLocalizedMessage());
+							unused++;
+						}	
+						lineCount++;
+
+						Configurator.checkBreak();
+					}
+				}
+				else{
+					String message = "Error: annotation file doesn't match UCSC table schema with tab seperation (USCS Genes knownGene or Ensembl Genes ensGene)!";
+					throw new ExitException(message);
+				}
+			}
+			else{
+				String message = "error: couldn't read file (" + new File(filename).getAbsolutePath() + ")";
+				throw new ExitException(message);
+			}
+
+			annoBuffer.close();
+		} 
+		catch (FileNotFoundException e) {
+			throw new ExitException(e.getMessage());
+		} 
+		catch (IOException e) {
+			throw new ExitException(e.getMessage());
+		}
+
+		info.stop(genes.size(), lineCount);
+		if (unused > 0){
+			if (Gene.isStrict()){
+				System.out.println("(unused genes: " + unused + " (non-protein-coding, uncommon chroms or incorrect/unregocnized annotations))");
+			}
+			else{
+				System.out.println("(unused genes: " + unused + " (incorrect/unregocnized annotations))");
+			}
+		}
+	}
+
+	/*
+	 * Imports a UCSC gene annotation file named with filename using a filter set of Uniprot protein IDs.
+	 * See also GeneSet(String, Set<String>)
+	 */
+	private void readAnnotations(String filename, Set<String> idFilter) throws ExitException{
+		Info info = new Info("read annotations");
+		int lineCount = 0;
+		int unused = 0;
+
+		try {
+			BufferedReader annoBuffer = new BufferedReader(new FileReader(new File(filename)));
+
+			if (annoBuffer.ready()){
+				String tmpheader = annoBuffer.readLine();
+
+				// check the header line
+				ucsc = tmpheader.matches(ucscHeader);
+				ensembl = tmpheader.matches(ensembleHeader);
+
+				// if not a expected header, check if data format fits
+				if (!(ucsc || ensembl)){
+					ucsc = Gene.isUcscLine(tmpheader.split("\t"));
+					ensembl = Gene.isEnsemblLine(tmpheader.split("\t"));
+					if (ucsc || ensembl){
+						annoBuffer.close();
+						annoBuffer = new BufferedReader(new FileReader(new File(filename)));
+					}
+				}
+				
+				if (ucsc || ensembl){
+					String line;
+					while (annoBuffer.ready()){
+						line = annoBuffer.readLine();
+
+						// check line for correct number of fields
+						if ((ucsc && line.split("\t").length != 12) || (ensembl && line.split("\t").length != 16))
+							throw new ExitException("error: couldn't parse annotations (wrong # of fields in line " + (lineCount+2) + ")");
+
+						// check if the gene codes for a protein in the filter
+						if (ucsc && idFilter.contains(line.split("\t")[10].split("-")[0]) || idFilter.contains(line.split("\t")[1])){ //
+							try {
+								if (ucsc)
+									genes.add(new Gene(line, Gene.UCSC));	
+								else if (ensembl)
+									genes.add(new Gene(line, Gene.ENSEMBL));	
+							} 
+							catch (FormatException e) {
+								Logger.write(Configurator.getSysProperty("msPepSetName", "ipig")+".log", e.getLocalizedMessage());
+								unused++;
+							}	
+						}
+						lineCount++;
+
+						Configurator.checkBreak();
+					}
+				}
+				else{
+					String message = "error: annotation file doesn't match UCSC table schema with tab seperation (e.g. USCS Genes knownGene or Ensembl Genes ensGene)!";
+					throw new ExitException(message);
+				}
+			}
+			else{
+				String message = "error: couldn't read file (" + new File(filename).getAbsolutePath() + ")";
+				throw new ExitException(message);
+			}
+
+			annoBuffer.close();
+
+		} catch (FileNotFoundException e) {
+			throw new ExitException(e.getMessage());
+		} catch (IOException e) {
+			throw new ExitException(e.getMessage());
+		}
+
+		info.stop(genes.size(), lineCount);
+		if (unused > 0){
+			if (Gene.isStrict()){
+				System.out.println("(unused genes: " + unused + " (non-protein-coding, uncommon chroms or incorrect/unregocnized annotations))");
+			}
+			else{
+				System.out.println("(unused genes: " + unused + " (incorrect/unregocnized annotations))");
+			}
+		}
+	}
+
+	/**
+	 * <p>Prints the annotations of all genes in the set on the screen.</p>
+	 * <p>This method is recommended for small sets only, as it would produce a lot of screen output for larger sets.
+	 * For large sets use {@link #printAnnotations(int number)} instead to reduce the number of printed annotations.</p>
+	 */
+	public void printAnnotations(){
+		printAnnotations(genes.size());
+	}
+
+	/**
+	 * <p>Prints the annotations of a chosen number of genes on the screen, starting with the first gene in the set.</p>
+	 * 
+	 * @param number Number of genes to be printed.
+	 */
+	public void printAnnotations(int number){
+		number = Math.min(number, genes.size());
+		System.out.println("GeneAnnotationSet contains " + genes.size() + " gene annotations");
+		System.out.println("Printing " + number + " of " + genes.size() + " genes:");
+		System.out.println(ucscHeader);
+		for (int i=0; i<number; i++){
+			System.out.println(genes.get(i).toString());
+		}
+	}
+
+	/**
+	 * <p>Reads the nucleotide sequences for all genes from chromosome fasta files.</p>
+	 * 
+	 * <p>The chromosome fasta files must be named like the chromosomes in the gene annotations plus the .fa extension
+	 * (e.g. "chr1.fa", "chr2.fa", "chrY.fa" and so on, or maybe "chrI.fa", "chrII.fa", "chrIV.fa" etc. depending on the annotation).</p>
+	 * 
+	 * <p>They must be all located in the same folder, which is indicated as input parameter.</p>
+	 * 
+	 * <p>The first line in the fasta files must start with ">" followed by the chromosomes name (e.g. ">chr1" or ">chrVI").
+	 * The method assumes 50 (!) nucleotides per line, lines with other lengths will cause wrong position calculations and therefore wrong sequences.</p>
+	 * 
+	 * @param chrompath Folder containing all chromosomes needed for the genes
+	 * @throws ExitException 
+	 */
+	public void readNaSequences(String chrompath) throws ExitException{
+		Info info = new Info("read gene sequences");
+		int count = 0;
+
+		/* A hashmap assigns a vector of Genes (or actually there indices) to the chromosomes.
+		 * The genes in a vector are then sorted by the txStart and txEnd, so running once over the chromosome is enough to read all needed sequences.
+		 * Overlaps are also considered. Chromosome parts without needed sequences are skipped.
+		 */
+		
+		HashMap<String,Vector<Integer>> chroms = getCommonChroms();
+		HashMap<Integer,StringBuffer> tmpSeqs = new HashMap<Integer,StringBuffer>();
+		TreeMap<Integer,Vector<Integer>> txStarts = new TreeMap<Integer,Vector<Integer>>();
+		TreeMap<Integer,Vector<Integer>> txEnds = new TreeMap<Integer,Vector<Integer>>();
+		Vector<Integer> geneidx;
+		String chrom;
+
+		int skip, start, end;
+		int skipped = 0;
+		int key;
+		int idx;
+
+		Entry<Integer,Vector<Integer>> txs, txe;
+		String line;
+
+		for (Entry<String, Vector<Integer>> entry : chroms.entrySet()){
+			geneidx = entry.getValue();
+			chrom = entry.getKey();
+			skipped = 0;
+
+			// build sorted lists with txStarts and txEnds with connections to their related genes
+			for (int i=0; i<geneidx.size(); i++){
+				key = genes.get(geneidx.get(i)).getTxStart();
+				if (!txStarts.containsKey(key)) 
+					txStarts.put(key, new Vector<Integer>());
+				txStarts.get(key).add(geneidx.get(i));
+
+				key = genes.get(geneidx.get(i)).getTxEnd();
+				if (!txEnds.containsKey(key)) 
+					txEnds.put(key, new Vector<Integer>());
+				txEnds.get(key).add(geneidx.get(i));
+			}
+
+			try {
+				// check path
+				if (!chrompath.endsWith("\\") && !chrompath.endsWith("/"))
+					chrompath = chrompath + "/";
+				BufferedReader chromBuffer = new BufferedReader(new FileReader(new File(chrompath + chrom + ".fa")));
+
+				// check file
+				if (chromBuffer.ready()){
+					Pattern p = Pattern.compile(">" + chrom);
+					if (!chromBuffer.readLine().matches(p.pattern())){	
+						continue;	
+					}
+				}
+
+				while (!txEnds.isEmpty() && chromBuffer.ready()){
+
+					// skip chromosome parts towards next txStart
+					if (tmpSeqs.isEmpty()){
+						txs = txStarts.firstEntry();
+						skip = (int) Math.ceil((txs.getKey()+1) / 50d) - 1 - skipped;
+						for (int i=0; i<txs.getValue().size(); i++)
+							tmpSeqs.put(txs.getValue().get(i), new StringBuffer());
+						chromBuffer.skip(skip*50+skip);
+						skipped += skip;
+						txStarts.remove(txs.getKey());
+					}
+
+					// take next txStarts resp. add gene indices to a sequence buffer, if they are in the next line
+					while (!txStarts.isEmpty() && (int) Math.ceil((txStarts.firstKey()+1) / 50d) - 1 == skipped){
+						txs = txStarts.firstEntry();
+						for (int i=0; i<txs.getValue().size(); i++)
+							tmpSeqs.put(txs.getValue().get(i), new StringBuffer());
+						txStarts.remove(txs.getKey());
+					}
+
+					// read next line and add it to all entries in the sequence buffer
+					line = chromBuffer.readLine();
+					for (Entry<Integer,StringBuffer> seq : tmpSeqs.entrySet()){
+						seq.getValue().append(line);
+					}
+
+					// if a gene ends in the current line, finish its sequence with cutting head and tail up to txStart resp. from txEnd
+					while (!txEnds.isEmpty() && (int) Math.ceil((txEnds.firstKey()+1) / 50d) - 1 == skipped){
+						txe = txEnds.firstEntry();
+						for (int i=0; i<txe.getValue().size(); i++){
+							idx = txe.getValue().get(i);
+							start = genes.get(idx).getTxStart() % 50;
+							end = genes.get(idx).getTxEnd() - genes.get(idx).getTxStart() + start;
+							genes.get(idx).setSequence(tmpSeqs.remove(idx).toString().substring(start, end));
+							count++;
+						}
+						txEnds.remove(txe.getKey());
+					}
+
+					skipped++;
+
+					Configurator.checkBreak();
+				}
+
+				chromBuffer.close();
+			} catch (FileNotFoundException e) {
+				throw new ExitException(e.getMessage());
+			} catch (IOException e) {
+				throw new ExitException(e.getMessage());
+			}
+
+		}
+
+		info.stop(count, genes.size());
+	}
+
+	/**
+	 * Deletes the nucleotide sequence of every gene.
+	 */
+	public void deleteNaSequences(){
+		for (Gene gene : genes){
+			gene.setSequence(null);
+		}
+		System.gc();
+	}
+
+	/**
+	 * <p>This function calculates the proteins the genes have in common.</p>
+	 * 
+	 * <p>It will return a HashMap with protein identifiers as keys and as value a Vector 
+	 * containing the indices of genes (in this GeneSet) which code for this protein.</p>
+	 *  
+	 * @return a HashMap with proteins as keys and Vectors of gene indices as values
+	 */
+	public HashMap<String, Vector<Integer>> getCommonProts(){
+		HashMap<String,Vector<Integer>> commonProts = new HashMap<String,Vector<Integer>>();
+		String id;
+
+		for (int i=0; i<genes.size(); i++){
+			id = genes.get(i).getProteinReference();
+			if (!commonProts.containsKey(id)){
+				commonProts.put(id, new Vector<Integer>());
+			}
+			commonProts.get(id).add(i);
+		}
+
+		return commonProts;
+	}
+
+	/**
+	 * <p>This function calculates the chromosomes the genes have in common.</p>
+	 * 
+	 * <p>It will return a HashMap with chromosome names as keys and as value a Vector 
+	 * containing the indices of genes (in this GeneSet) which lie on this chromosome.</p>
+	 *  
+	 * @return a HashMap with chromosomes as keys and Vectors of gene indices as values
+	 */
+	public HashMap<String, Vector<Integer>> getCommonChroms(){
+		HashMap<String,Vector<Integer>> commonChroms = new HashMap<String,Vector<Integer>>();
+		String id;
+
+		for (int i=0; i<genes.size(); i++){
+			id = genes.get(i).getChrom();
+			if (!commonChroms.containsKey(id)){
+				commonChroms.put(id, new Vector<Integer>());
+			}
+			commonChroms.get(id).add(i);
+		}
+
+		return commonChroms;
+	}
+
+	/**
+	 * <p>This functions reads the aminoacid sequences related to the genes as they can be obtained
+	 * from the UCSC table browser (either with track "UCSC Genes" and table "knownGenePep" or the track "Ensembl Genes" with table "ensPep").</p>
+	 * 
+	 * <p> The header of a aa sequences file has to be "#name	seq"</p>
+	 * 
+	 * @param filename
+	 * @throws ExitException 
+	 */
+	public void readAaSequences(String filename) throws ExitException{
+		Info info = new Info("read aa sequences");
+		int count = 0;
+		int lines = 0;
+//		String tmpheader = null;
+		String[] line;
+
+		// create a map with <gene name, gene index>
+		HashMap<String,Integer> genemap = new HashMap<String,Integer>();
+		for (int g=0; g<genes.size(); g++){
+			genemap.put(genes.get(g).getName(),g);
+		}
+
+		try {
+			BufferedReader aaBuffer = new BufferedReader(new FileReader(new File(filename)));
+
+//			if (aaBuffer.ready()){
+				//  !!! header test canceled, because ucsc files from ftp server are without header !!!
+				//	tmpheader = aaBuffer.readLine();
+				//	if (tmpheader.matches("#name	seq")){
+					while (aaBuffer.ready()){
+						lines++;
+						line = aaBuffer.readLine().split("\t");
+						
+						// if the genemap contains the gene name in this line, then the aa sequences is added to the corresponding gene
+						if (genemap.containsKey(line[0])){
+							genes.get(genemap.get(line[0])).setAaSequence(line[1]);
+							count++;
+						}
+
+						Configurator.checkBreak();
+					}
+//				}
+//				else{
+//					String message = "error: file doesn't match UCSC/Ensembl pep table schema with tab seperation!"
+//							+ "\nexpected header:\t" + "#name	seq" 
+//							+ "\nyour header:\t\t" + tmpheader;
+//					throw new ExitException(message);
+//				}
+//			}
+
+			aaBuffer.close();
+
+		} catch (FileNotFoundException e) {
+			throw new ExitException(e.getMessage());
+		} catch (IOException e) {
+			throw new ExitException(e.getMessage());
+		}
+
+		info.stop(count, lines);
+		removeAaSeqless();
+	}
+
+	/**
+	 * Deletes the aminoacid sequence of every gene.
+	 */
+	public void deleteAaSequences(){
+		for (Gene gene : genes){
+			gene.setAaSequence(null);
+		}
+		System.gc();
+	}
+
+	/* removes all genes without a aminoacid sequence
+	 * e.g. used in readAaSequences after loading the sequences, because genes without are useless in the mapping steps
+	 */
+	private void removeAaSeqless(){
+		Gene gene;
+		for (Iterator<Gene> it = genes.iterator(); it.hasNext();){
+			gene = it.next();
+			if (gene.getAaSequence() == null){
+				it.remove();
+			}
+		}
+		System.gc();
+	}
+
+	/**
+	 * <p>Deletes every Gene which is not marked as used (see {@link Gene#setUsed(boolean)}).</p>
+	 * 
+	 * <p>This function might be used in front of memory expensive operations like {@link #readNaSequences(String)}.</p>
+	 */
+	public void removeUnused(){
+		for (Iterator<Gene> it = genes.iterator(); it.hasNext();){
+			if (!it.next().isUsed()){
+				it.remove();
+			}
+		}
+		System.gc();
+	}
+
+	/**
+	 * Gives a vector with randomly chosen Gene objects.
+	 * 
+	 * @param number The number of required Gene objects.
+	 * @return Vector of Gene objects.
+	 */
+	public Vector<Gene> getRandomSubset(int number){
+		Collections.shuffle(genes);	
+		return new Vector<Gene>(genes.subList(0, Math.min(number, genes.size())));
+	}	
+
+	public Vector<Gene> getSubset(int start, int end){
+		Vector<Gene> subset = new Vector<Gene>();
+
+		end = Math.min(end, genes.size());
+		for(int i=start; i<end; i++){
+			subset.add(genes.get(i));
+		}
+
+		return subset;
+	}	
+
+	/**
+	 * Returns a String with the header information like in the input file (either ucsc or ensembl genes).
+	 * 
+	 * @return  the header as a String
+	 */
+	public String getHeader(){
+		if (ucsc)
+			return ucscHeader;
+		else // ensembl
+			return ensembleHeader;
+	}
+}
diff --git a/src/de/rki/ng4/ipig/data/MzIdentML.java b/src/de/rki/ng4/ipig/data/MzIdentML.java
new file mode 100644
index 0000000..19c66fe
--- /dev/null
+++ b/src/de/rki/ng4/ipig/data/MzIdentML.java
@@ -0,0 +1,327 @@
+/* Copyright (c) 2012,
+ * Mathias Kuhring, KuhringM at rki.de, Robert Koch-Institut, Germany, 
+ * All rights reserved. For details, please note the license.txt.
+ */
+
+package de.rki.ng4.ipig.data;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map.Entry;
+import java.util.Vector;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.jdom.Document;
+import org.jdom.Element;
+import org.jdom.JDOMException;
+import org.jdom.Namespace;
+import org.jdom.input.SAXBuilder;
+
+public class MzIdentML {
+	
+	private final String xsd1 = "mzIdentML1.0.0.xsd";
+	private final String xsd2 = "FuGElightv1.0.0.xsd";
+	private final String xsdPath = "/xsd/";
+	
+	/**
+	 * <p>Reads an mzIdentML file and parses the peptide identifications.</p>
+	 * 
+	 * <p>The output is a Vector with Strings each representing a identified peptide in the same format as expected from the txt file:<br>
+	 * "prot_acc	prot_desc	pep_query	pep_isunique	pep_exp_z	pep_score	pep_seq	pep_var_mod	pep_var_mod_pos"</p>
+	 * 
+	 * @param filename Path of the mzIdentML file
+	 * @return a Vector with a String for each peptide identification
+	 */
+	public Vector<String> load(String filename){
+		Vector<String> peptides = null;
+				
+		try {
+			// extract xsds from jar-resources
+			xsdExtract();
+			
+			// load and validate the mzIdentML
+			SAXBuilder builder =
+					new SAXBuilder("org.apache.xerces.parsers.SAXParser", true);
+			builder.setFeature(
+					"http://apache.org/xml/features/validation/schema", true);
+			builder.setProperty(
+					"http://apache.org/xml/properties/schema/external-schemaLocation",
+					"http://psidev.info/psi/pi/mzIdentML/1.0 " + new File("mzIdentML1.0.0.xsd").getAbsolutePath());
+			Document doc = builder.build(filename);
+			
+			// delete the xsds
+			xsdDelete();
+
+			// start navigating trough xml structure
+			Element root = doc.getRootElement();
+			Namespace ns = root.getNamespace();
+
+			// get proteins and peptides
+			Element seqs = root.getChild("SequenceCollection", ns);
+			HashMap<String,Protein> prots = getProts(seqs, ns);
+			HashMap<String,Peptide> peps = getPeps(seqs, ns);
+
+			
+			// get results
+			Element idents = root.getChild("DataCollection", ns).getChild("AnalysisData", ns).getChild("SpectrumIdentificationList", ns);
+			Vector<Result> res = getResults(idents, ns);
+
+			// combine them
+			peptides = combine(res, peps, prots);
+
+			// write them to a file (just for verification)
+//			BufferedWriter pepBuffer = new BufferedWriter(new FileWriter(new File("mzidParsingRes.txt")));
+//			pepBuffer.write("prot_acc	prot_desc	pep_query	pep_isunique	pep_exp_z	pep_score	pep_seq	pep_var_mod	pep_var_mod_pos");
+//			pepBuffer.newLine();
+//			for (String p : peptides){
+//				pepBuffer.write(p);
+//				pepBuffer.newLine();
+//			}
+//			pepBuffer.close();
+
+		} catch (JDOMException e) {
+			e.printStackTrace();
+		} catch (IOException e) {
+			e.printStackTrace();
+		}
+
+		return peptides;
+	}
+	
+	// start of extraction of xsd file from jar-resources, 
+	// cause the SAXBuilder need a full usable path as String for xml-Validation (So can't use a resource as stream).
+	private String xsdExtract(){
+		File f1 = new File(xsd1);
+		File f2 = new File(xsd2);
+		
+		xsdCopy(xsdPath + xsd1, f1);
+		xsdCopy(xsdPath + xsd2, f2);
+		
+		return f1.getAbsolutePath();
+	}
+	
+	// xsd resource is copied via a resource stream to an external file.
+	private void xsdCopy(String xsdRes, File xsdFile){
+		InputStream is = getClass().getResourceAsStream(xsdRes);
+
+		BufferedReader br = new BufferedReader(new InputStreamReader(is));
+		try {
+			BufferedWriter bw = new BufferedWriter(new FileWriter(xsdFile));
+
+			while (br.ready()){
+				bw.write(br.readLine());
+				bw.newLine();
+			}
+
+			br.close();
+			bw.close();
+		} 
+		catch (IOException e) {
+		}
+	}
+	
+	// external file can be deleted after xml validation
+	private void xsdDelete(){
+		new File(xsd1).delete();
+		new File(xsd2).delete();
+	}
+
+	// combines the results with the peptides and proteins to a string per peptide like needed as input for a Peptide-Object
+	// prot_acc	prot_desc	pep_query	pep_isunique	pep_exp_z	pep_score	pep_seq	pep_var_mod	pep_var_mod_pos
+	private Vector<String> combine(Vector<Result> res, HashMap<String,Peptide> peps, HashMap<String,Protein> prots){
+		Vector<String> peptideRows = new Vector<String>();
+		String row;
+		String sep = "\t";
+
+		for (Result r : res){
+			Peptide pep = peps.get(r.Peptide_ref);
+			for (String evi : r.DBSequence_Ref){
+				row = prots.get(evi).accession + sep + prots.get(evi).proteinDescription + sep + query(r.query) + sep 
+						+ unique(r.unique) + sep + r.chargeState + sep + r.score + sep + pep.sequence + sep
+						+ mod(pep.sequence, pep.modLocation, pep.modName); 
+				peptideRows.add(row);
+			}
+		}
+
+		return peptideRows;
+	}
+
+	// parses the last number in query (originally SpectrumIdentificationResult's "id")
+	private String query(String query){
+		Matcher matcher = Pattern.compile( "\\d+" ).matcher( query ); 
+		while ( matcher.find() ) 
+			query = matcher.group();
+		return query;
+	}
+
+	// returns the string representation for the unique value
+	private String unique(boolean unique){
+		if (unique) return "1";
+		else return "0";
+	}
+
+	// builds a string representation of a peptide's modifications similar to mascot csv exports
+	private String mod(String seq, Vector<String> locations, Vector<String> names){
+		char[] posArray = new char[seq.length()+2];
+		Arrays.fill(posArray, '0'); 
+		StringBuffer posString = new StringBuffer().append(posArray);
+		HashMap<String,Integer> modCount = new HashMap<String,Integer>();
+
+		boolean mod = false;
+		int shift = 0;
+		for (int i=0; i<locations.size(); i++){
+			try{
+				int pos = Integer.parseInt(locations.get(i)) + shift;
+				
+				String num = "X";
+				Matcher matcher = Pattern.compile( "UNIMOD:\\d+" ).matcher( names.get(i) );
+				while ( matcher.find() ) 
+					num = matcher.group().split(":")[1];
+				
+				posString.replace(pos, pos+1, "[" + num + "]");
+				shift += num.length()+1; // == (num.length-1) for nums > 9, +2 for the brackets
+				mod = true;
+				if (!modCount.containsKey(names.get(i))) 
+					modCount.put(names.get(i), 0);
+				modCount.put(names.get(i), modCount.get(names.get(i))+1);
+			}
+			catch (NumberFormatException e){}
+		}
+		posString.insert(1, ".");
+		posString.insert(posString.length()-1, ".");
+
+		StringBuffer nameString = new StringBuffer();
+		for (Entry<String,Integer> e : modCount.entrySet()){
+			if (!e.getKey().matches(""))
+				if (e.getValue()>1)
+					nameString.append(e.getValue() + " " + e.getKey() + ", ");
+				else
+					nameString.append(e.getKey() + ", ");
+		}
+
+		if (mod)
+			return nameString.substring(0, nameString.length()-2) + "\t" + posString.toString();
+		else
+			return "\t";
+	}
+
+	// parses all proteins
+	private HashMap<String,Protein> getProts(Element seqs, Namespace ns){
+		@SuppressWarnings("unchecked")
+		List<Element> prots = seqs.getChildren("DBSequence", ns);
+		HashMap<String,Protein> proteins = new HashMap<String,Protein>();
+		for (Element prot : prots){
+			String id = prot.getAttributeValue("id");
+			Protein temp = new Protein();
+			temp.accession = prot.getAttributeValue("accession");
+			temp.proteinDescription = "";
+			@SuppressWarnings("unchecked")
+			List<Element> cvParams = prot.getChildren("cvParam", ns);
+			for (Element cv : cvParams){
+				if (cv.getAttributeValue("name").matches("protein description"))
+					temp.proteinDescription = cv.getAttributeValue("value");
+			}
+			proteins.put(id, temp);
+		}
+		return proteins;
+	}
+
+	// parses all peptides
+	private HashMap<String,Peptide> getPeps(Element seqs, Namespace ns){
+		@SuppressWarnings("unchecked")
+		List<Element> peps = seqs.getChildren("Peptide", ns);
+		HashMap<String,Peptide> peptides = new HashMap<String,Peptide>();
+		for (Element pep : peps){
+			String id = pep.getAttributeValue("id");
+			Peptide temp = new Peptide();
+			temp.sequence = pep.getChild("peptideSequence", ns).getValue();
+			temp.modLocation = new Vector<String>();
+			temp.modName = new Vector<String>();
+			@SuppressWarnings("unchecked")
+			List<Element> mods = pep.getChildren("Modification", ns);
+			for (Element mod : mods){
+				temp.modLocation.add(mod.getAttributeValue("location"));
+				temp.modName.add("");
+				@SuppressWarnings("unchecked")
+				List<Element> cvParams = mod.getChildren("cvParam", ns);
+				for (Element cv : cvParams){
+					if (cv.getAttributeValue("accession").startsWith("UNIMOD")){
+						temp.modName.set(temp.modName.size()-1, cv.getAttributeValue("name") + " (" + cv.getAttributeValue("accession") + ")");
+					}
+				}
+			}
+			peptides.put(id, temp);
+		}
+		return peptides;
+	}
+
+	// parses the results resp. each SpectrumIdentificationItem containing at least one PeptideEvidence
+	private Vector<Result> getResults(Element idents, Namespace ns){
+		@SuppressWarnings("unchecked")
+		List<Element> res = idents.getChildren("SpectrumIdentificationResult", ns);
+		Vector<Result> results = new Vector<Result>();
+		for (Element r : res){
+			@SuppressWarnings("unchecked")
+			List<Element> items = r.getChildren("SpectrumIdentificationItem", ns);
+			for (Element item : items){
+				@SuppressWarnings("unchecked")
+				List<Element> evis = item.getChildren("PeptideEvidence", ns);
+				if (evis.size() > 0){
+					Result temp = new Result();
+					temp.query = r.getAttributeValue("id");
+					temp.chargeState = item.getAttributeValue("chargeState");
+					temp.Peptide_ref = item.getAttributeValue("Peptide_ref");
+					temp.DBSequence_Ref = new Vector<String>();
+					for (Element evi : evis){
+						temp.DBSequence_Ref.add(evi.getAttributeValue("DBSequence_Ref"));
+					}
+					temp.score = "0";
+					temp.unique = true;
+					@SuppressWarnings("unchecked")
+					List<Element> cvParams = item.getChildren("cvParam", ns);
+					for (Element para : cvParams){
+						if (para.getAttributeValue("name").matches("mascot:score"))
+							temp.score = para.getAttributeValue("value");
+						if (para.getAttributeValue("name").matches("peptide shared in multiple proteins"))
+							temp.unique = false;
+					}
+					results.add(temp);
+				}
+			}
+		}
+		return results;
+	}
+
+	// class for collecting the protein informations temporally
+	private class Protein{
+		String accession;
+		String proteinDescription;
+	}
+
+	// class for collecting the peptide informations temporally
+	private class Peptide{
+		String sequence;
+		Vector<String> modLocation;
+		Vector<String> modName;
+
+	}
+	
+	// class for collecting the result informations temporally
+	private class Result{
+		String query;
+		String chargeState;
+		String Peptide_ref;
+		Vector<String> DBSequence_Ref;
+		boolean unique;
+		String score;
+	}
+}
diff --git a/src/de/rki/ng4/ipig/data/Peptide.java b/src/de/rki/ng4/ipig/data/Peptide.java
new file mode 100644
index 0000000..2e4b0c1
--- /dev/null
+++ b/src/de/rki/ng4/ipig/data/Peptide.java
@@ -0,0 +1,431 @@
+/* Copyright (c) 2012,
+ * Mathias Kuhring, KuhringM at rki.de, Robert Koch-Institut, Germany, 
+ * All rights reserved. For details, please note the license.txt.
+ */
+
+package de.rki.ng4.ipig.data;
+
+import java.util.TreeSet;
+import java.util.Vector;
+import java.util.regex.Pattern;
+
+import de.rki.ng4.ipig.exceptions.FormatException;
+import de.rki.ng4.ipig.mapping.Position;
+
+/**
+ * <p>Gene is a container for diverse informations of a gene, as they are provided by the USCS table browser.</p>
+ * 
+ * <p>The import of USCS annotations should be done with a {@link GeneSet}, which provides reading methods 
+ * and creates an Gene object for any gene (resp. line) in an imported annotation file.</p>
+ * 
+ * <p>Please note: Coordinates (resp. positions) provided by the UCSC are zero-based and half-open.
+ * This means gene positions are indexed like for example Java Arrays from 0 to length-1.</p>
+ * 
+ * <p>The member variables correspond to the USCS table schema for the track "UCSC Genes" and table "knownGene"
+ * or the track "Ensembl Genes" with table "ensGene"
+ * with e.g. clade "Mammal", genome "Human" and group "Genes and Gene Prediction Tracks" (
+ * <a href="http://genome.ucsc.edu/cgi-bin/hgTables?org=Human&db=hg19&hgsid=205072007&hgta_doMainPage=1">
+ * USCS Table Browser</a>).</p>
+ *
+ * @author Mathias Kuhring
+ */
+
+/**
+ * <p>A Peptide is a container for diverse informations of a peptide from the input to be mapped.</p>
+ * 
+ * <p>The import of peptides should be done with a {@link PeptideSet}, as it provides reading methods for txt and mzIdentML files.
+ * It will create an Peptide object for any imported peptide.</p>
+ * 
+ * 
+ * 
+ * @author Mathias Kuhring
+ */
+public class Peptide {
+	// data and annotation from the ms-csv input, as far as needed (prot_acc corresponds to UniProtKB-ID/EntryName or IPI)
+	// prot_acc	prot_desc	pep_query	pep_isunique	pep_exp_z	pep_score	pep_seq	pep_var_mod	pep_var_mod_pos
+	private String prot_acc;
+	private String prot_desc;
+	private String pep_query;
+	private String pep_isunique;
+	private String pep_exp_z;
+	private String pep_score;
+	private String pep_seq;
+	private String pep_var_mod;
+	private String pep_var_mod_pos1;
+	
+	private Vector<String> rest;
+
+	// the input row as string, used to simplify outputs
+	private String row;
+
+	// different protein ids, imported from uniprot idmapping file (UniProtKB-AC and RefSeq)
+	private String uniqueIdentifier;
+	private Vector<String> refSeq;
+	private Vector<String> ensemble_TRS;
+
+	private String geneSymbol;
+
+	// positions to be calculated by the mapping
+	private TreeSet<Position> positions;
+
+	private Vector<String> geneNames;
+
+	// some checkback values, if the peptide got protein ids, linked to a gene and mapped to this gene
+	private boolean proteinAssigned;
+	private boolean geneAssigned;
+	private boolean geneMapped;
+
+	private String method;
+	//	private boolean validated;
+
+	/**
+	 * <p>Peptide is initiated by giving a String with following tab separated values:</p>
+	 * 
+	 * <p>prot_acc	prot_desc	pep_query	pep_isunique	pep_exp_z	pep_score	pep_seq	pep_var_mod	pep_var_mod_pos</p>SS
+	 * 
+	 * @param row One row of a peptide file describing one peptide
+	 * @throws FormatException
+	 */
+	public Peptide(String row) throws FormatException{
+		this.row = row.replace("\"", "");
+		rest = new Vector<String>();
+		refSeq = new Vector<String>();
+		ensemble_TRS = new Vector<String>();
+		geneNames = new Vector<String>();
+		positions = new TreeSet<Position>(new Position.PositionComparator());
+		parseCsvRow();
+	}
+
+	/*
+	 * Parsing the imported row by checking the format and split it.
+	 * Taking only the variables needed for mapping, which are the proteins name (corr. UniProtKB-ID) and the peptides sequence.
+	 * The Row is saved as String for later exports.
+	 */
+	private void parseCsvRow() throws FormatException{
+		/* prot_acc	prot_desc	pep_query	pep_isunique	pep_exp_z	pep_score	pep_seq	pep_var_mod	pep_var_mod_pos
+		 * ROA2_HUMAN	Heterogeneous nuclear ribonucleoproteins A2/B1 OS=Homo sapiens GN=HNRNPA2B1 PE=1 SV=2	252	1	2	140.52	NMGGPYGGGNYGPGGSGGSGGYGGR	Deamidated (NQ)	0.1000000000000000000000000.0
+		 * CPSM_HUMAN	Carbamoyl-phosphate synthase [ammonia], mitochondrial OS=Homo sapiens GN=CPS1 PE=1 SV=2	99	1	2	5.51	ASRSFPFVSK	
+		 */
+		Pattern p = Pattern.compile("[\\w:\\.\\|]*\t.*\t\\d*\t\\d\t\\d\t(\\d|\\.)+\t[A-Z]+.*");
+		if (!row.matches(p.pattern())){	
+			throw new FormatException("peptide row incorrect");	
+		}
+
+		String[] splits = row.split("\t");
+
+		prot_acc = splits[0];
+		prot_desc = splits[1];
+		pep_query = splits[2];
+		pep_isunique = splits[3];
+		pep_exp_z = splits[4];
+		pep_score = splits[5];
+		pep_seq = splits[6];
+		pep_var_mod = "";
+		pep_var_mod_pos1 = "";
+		if (splits.length>7)
+			pep_var_mod = splits[7];
+		if (splits.length>8)
+			pep_var_mod_pos1 = splits[8];
+		if (splits.length>9)
+			for (int i=9; i<splits.length; i++)
+				rest.add(splits[i]);
+	}
+	
+	/**
+	 * <p>Extracts a gene name from the protein description.
+	 * This is a public method as it has to be done from "outside" due to some input file properties.</p>
+	 * 
+	 * <p>For further details see source (!) of PeptideSet#readTxt(String).</p>
+	 */
+	public void extrGS(){
+		String label1 = "Gene_Symbol=";
+		String label2 = "GN=";
+		int pos = prot_desc.indexOf(label1);
+		if (pos >= 0){
+			int from = pos + label1.length();
+			int to = prot_desc.indexOf(" ", from);
+			if(to>0){
+				geneSymbol =  prot_desc.substring(from, to);
+			}
+			else{
+				geneSymbol =  prot_desc.substring(from);
+			}
+		}
+		pos = prot_desc.indexOf(label2);
+		if (pos >= 0){
+			int from = pos + label2.length();
+			int to = prot_desc.indexOf(" ", from);
+			if(to>0){
+				geneSymbol =  prot_desc.substring(from, to);
+			}
+			else{
+				geneSymbol =  prot_desc.substring(from);
+			}
+		}
+	}
+
+	/**
+	 * Returns identifiers or references for the protein this peptide is part of.
+	 * 
+	 * @param ref Parameter to choose the protein identifier or identifier, "name" for the protein name (e.g. UniProtKB-ID), 
+	 * "id" for protein ids (UniProtKB-AC and RefSeq as references to UCSC knownGenes and Ensemble_TRS as reference to EnsenbleGenes)
+	 * @return Vector of identifiers
+	 */
+	public Vector<String> getProt(String ref){
+		Vector<String> out = new Vector<String>();
+		if (ref.matches("id")){
+			out.add(prot_acc);
+			out.add(uniqueIdentifier);
+			out.addAll(refSeq);
+			out.addAll(ensemble_TRS);
+			return out;
+		}
+		if (ref.matches("name")){
+			out.add(prot_acc);
+			return out;
+		}
+		else
+			return null;
+	}
+
+	/**
+	 * Returns the protein accession (e.g. UniProtKB-ID or IPI).
+	 * 
+	 * @return The protein accession (UniProtKB-ID or IPI)
+	 */
+	public String getProt_acc(){
+		return prot_acc;
+	}
+
+	/**
+	 * Sets the protein accession.
+	 * 
+	 * @param prot_acc protein accession
+	 */
+	public void setProt_acc(String prot_acc){
+		this.prot_acc = prot_acc;
+	}
+
+	/**
+	 * Returns the peptide's aminoacid sequence.
+	 * 
+	 * @return The peptide's aminoacid sequence
+	 */
+	public String getSequence(){
+		return pep_seq;
+	}
+
+	/**
+	 * Sets this Peptide as mapped to a gene.
+	 * 
+	 * @param mapped set true if Peptide could be mapped to a gene
+	 */
+	public void setGeneMapped(boolean mapped){
+		geneMapped = mapped;
+	}
+
+	/**
+	 * Returns if this Peptide could be mapped to gene.
+	 * 
+	 * @return true if Peptide could be mapped, false else
+	 */
+	public boolean isGeneMapped(){
+		return geneMapped;
+	}
+
+	/**
+	 * Sets this Peptide as assigned to a protein.
+	 * 
+	 * @param proteinAssigned set true if Peptide is assigned to a protein
+	 */
+	public void setProteinAssigned(boolean proteinAssigned){
+		this.proteinAssigned = proteinAssigned;
+	}
+
+	/**
+	 * Returns if this Peptide is assigned to a protein.
+	 * 
+	 * @return true if Peptide is assigned, false else
+	 */
+	public boolean isProteinAssigned(){
+		return proteinAssigned;
+	}
+
+	/**
+	 * Returns a String representation of the peptide in the same format as the it was in the input.
+	 * 
+	 * @return Peptide's String representation
+	 */
+	public String getRow() {
+		return prot_acc + "\t"  + prot_desc + "\t" + pep_query + "\t" + pep_isunique + "\t" + pep_exp_z + "\t" + 
+				pep_score + "\t" + pep_seq + "\t" + pep_var_mod + "\t" + pep_var_mod_pos1;
+	}
+
+	/**
+	 * Returns the peptide's score.
+	 * @return peptide's score
+	 */
+	public String getPep_Score(){
+		return pep_score;
+	}
+
+	/**
+	 * Returns the peptide's charge.
+	 * @return peptide's charge
+	 */
+	public String getPep_exp_z(){
+		return pep_exp_z;
+	}
+
+	/**
+	 * Sets the unique identifier (UniProtKB-AC)
+	 * @param uniqueIdentifier usually the UniProtKB-AC
+	 */
+	public void setUniqueIdentifier(String uniqueIdentifier) {
+		this.uniqueIdentifier = uniqueIdentifier;
+	}
+
+	/**
+	 * Sets this Peptide as assigned to a gene.
+	 * 
+	 * @param geneAssigned set true if Peptide is assigned to a gene
+	 */
+	public void setGeneAssigned(boolean geneAssigned) {
+		this.geneAssigned = geneAssigned;
+	}
+	
+	/**
+	 * Returns if this Peptide is assigned to a gene.
+	 * 
+	 * @return true if Peptide is assigned, false else
+	 */
+	public boolean isGeneAssigned() {
+		return geneAssigned;
+	}
+
+	/**
+	 * Returns the protein description.
+	 * @return protein description
+	 */
+	public String getProt_desc() {
+		return prot_desc;
+	}
+
+	/**
+	 * Sets the protein description.
+	 * @param prot_desc protein description
+	 */
+	public void setProt_desc(String prot_desc) {
+		this.prot_desc = prot_desc;
+	}
+
+	/**
+	 * Returns the gene name or gene symbol.
+	 * @return
+	 */
+	public String getGN() {
+		return geneSymbol;
+	}
+
+	/**
+	 * Adds a RefSeq to a list.
+	 * @param string one RefSeq
+	 */
+	public void addRefSeq(String string) {
+		refSeq.add(string);
+	}	
+
+	/**
+	 * Returns if the peptide is uniquely assigned to one protein during identification.
+	 * @return "1" if unique, "0" else
+	 */
+	public String getPep_isunique(){
+		return pep_isunique;
+	}
+
+	/**
+	 * Set the match method, if the peptide could be matches to a gene.
+	 * @param method method name
+	 */
+	public void setMatchMethod(String method) {
+		this.method = method;
+	}
+
+	/**
+	 * Returns the match method, if the peptide could be matches to a gene.
+	 * @return method name
+	 */
+	public String getMatchMethod(){
+		return method;
+	}
+
+	/**
+	 * Returns the peptide's identification query value.
+	 * @return query value
+	 */
+	public String getPep_query() {
+		return pep_query;
+	}
+
+	/**
+	 * <p>Adds a new {@Link Position} to a sorted Set.</p> 
+	 * <p>Redundant Positions (same location; chromosome, strand, start and end positions) are rejected.</p>
+	 * 
+	 * @param pos  a new Position
+	 * @return true if Position could be added, false if rejected
+	 */
+	public boolean addPosition(Position pos){
+		return positions.add(pos);
+	}
+
+	/**
+	 * Returns the Set of Positions.
+	 * @return Set of Positions
+	 */
+	public TreeSet<Position> getPositions(){
+		return positions;
+	}
+
+	/**
+	 * Adds the gene the peptide could be assigned to.
+	 * 
+	 * @param name the gene's name
+	 */
+	public void addGene(String name) {
+		geneNames.add(name);
+	}
+
+	/**
+	 * Returns a Vector with all genes the peptide could be assigned to yet.
+	 * @return Vector with gene names
+	 */
+	public Vector<String> getGenes(){
+		return geneNames;
+	}
+
+	/**
+	 * Adds an ensemble transcript name (Ensembl_TRS)
+	 * @param ens Ensembl_TRS
+	 */
+	public void addEnsembl_TRS(String ens) {
+		ensemble_TRS.add(ens);
+	}
+
+	/**
+	 * Returns the peptide modifications.
+	 * @return the pep_var_mod
+	 */
+	public String getPep_var_mod() {
+		return pep_var_mod;
+	}
+	
+	/**
+	 * Returns a string representation of the peptide modification positions.
+	 * @return the pep_var_mod_pos1
+	 */
+	public String getPep_var_mod_pos1() {
+		return pep_var_mod_pos1;
+	}
+}
+
+
diff --git a/src/de/rki/ng4/ipig/data/PeptideSet.java b/src/de/rki/ng4/ipig/data/PeptideSet.java
new file mode 100644
index 0000000..b96b022
--- /dev/null
+++ b/src/de/rki/ng4/ipig/data/PeptideSet.java
@@ -0,0 +1,717 @@
+/* Copyright (c) 2012,
+ * Mathias Kuhring, KuhringM at rki.de, Robert Koch-Institut, Germany, 
+ * All rights reserved. For details, please note the license.txt.
+ */
+
+package de.rki.ng4.ipig.data;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Vector;
+
+import de.rki.ng4.ipig.exceptions.ExitException;
+import de.rki.ng4.ipig.exceptions.FormatException;
+import de.rki.ng4.ipig.mapping.Mapper;
+import de.rki.ng4.ipig.mapping.Position;
+import de.rki.ng4.ipig.tools.Configurator;
+import de.rki.ng4.ipig.tools.Info;
+import de.rki.ng4.ipig.tools.Logger;
+
+/**
+ * PeptideSet handles the peptides that should be mapped to a genome.
+ * This includes import methods and creation of a Peptide object for each peptide imported from a file.
+ * 
+ * @author Mathias Kuhring
+ *
+ */
+public class PeptideSet {
+
+	private Vector<Peptide> peptides;
+	private String header = "prot_acc	prot_desc	pep_query	pep_isunique	pep_exp_z	pep_score	pep_seq	pep_var_mod	pep_var_mod_pos";
+
+	/**
+	 * <p>Initializing a PeptideSet by giving a file (txt or mzid) of peptides identified from mass spectra.</p>
+	 * 
+	 * <p>A txt file must be tab separated and start with the following header:<br>
+	 * "prot_acc	prot_desc	pep_query	pep_isunique	pep_exp_z	pep_score	pep_seq	pep_var_mod	pep_var_mod_pos"</p>
+	 * 
+	 * <p>A mzid file must be a valid mzIdentML format file in version 1.0.0 
+	 * (<a href="http://www.psidev.info/index.php?q=node/403">Link</a>). 
+	 * It will be checked with a XML schema definition.</p>
+	 * 
+	 * @param filename
+	 * @throws ExitException
+	 */
+	public PeptideSet(String filename) throws ExitException{
+		peptides = new Vector<Peptide>();
+		readPeptides(filename);
+	}
+
+	/**
+	 * Returns the number of Peptides in this PeptideSet
+	 * @return number of Peptides
+	 */
+	public int size(){
+		return peptides.size();
+	}
+
+	/**
+	 * Returns the i-th Peptide with [0,size).
+	 * @param i Index of the desired Peptide
+	 * @return i-th Peptide
+	 */
+	public Peptide get(int i){
+		return peptides.get(i);
+	}
+
+	/**
+	 * Returns a Vector with all Peptides in this PeptideSet.
+	 * @return Vector of Peptides
+	 */
+	public Vector<Peptide> getAll() {
+		return peptides;
+	}
+
+	/**
+	 * Reads peptides from a file, either a txt or a mzid. 
+	 * 
+	 * @param filename complete peptide filename
+	 * @throws ExitException
+	 */
+	public void readPeptides(String filename) throws ExitException{
+		if (filename.endsWith(".mzid"))
+			readMzIdentML(filename);
+		else
+			readTxt(filename);
+	}
+
+	// parses the peptide txt files
+	private void readTxt(String filename) throws ExitException{
+		Info info = new Info("read ms peptides");
+		int count = 0;
+
+		try {
+			BufferedReader pepBuffer = new BufferedReader(new FileReader(new File(filename)));
+
+			String tmpheader = null;
+			String line, name = "", desc = "";
+			if (pepBuffer.ready()){
+				tmpheader = pepBuffer.readLine();
+			}
+			// check the header
+			if (tmpheader.equals(header)){
+				while (pepBuffer.ready()){
+					count++;
+					line = pepBuffer.readLine();					
+					try {
+						// each line resp peptide is passed to a new Peptide object, which parses the line
+						peptides.add(new Peptide(line));
+						// some files order peptides by proteins and keep only for the first peptide with same protein the protein information
+						// so if there is no protein information, "name" and "desc" hopefully have the once from the prior peptide
+						if (peptides.lastElement().getProt_acc().matches("")){
+							peptides.lastElement().setProt_acc(name);
+							peptides.lastElement().setProt_desc(desc);
+						}
+						// if there are protein informations they are temporally saved for the next peptides
+						else{
+							name = peptides.lastElement().getProt_acc();
+							desc = peptides.lastElement().getProt_desc();
+						}
+						// in some prot_desc are gene names resp. gene symbols which can not be extracted during the Peptide object generation
+						// just cause of the sometimes missing protein information, so its done afterwards
+						peptides.lastElement().extrGS();
+					} 
+					catch (FormatException e) {
+						String message = "unreadable peptide:\t" + line;
+						Logger.write(Configurator.getSysProperty("msPepSetName", "ipig")+".log", message);
+					}
+
+					Configurator.checkBreak();
+				}
+			}
+			else{
+				String message = "peptide file doesn't match expected schema!\nexpected header:\t" + header + "\nyour header:\t\t" + tmpheader;
+				throw new ExitException(message);
+			}
+
+			pepBuffer.close();
+		} catch (FileNotFoundException e) {
+			throw new ExitException(e.getMessage());
+		} catch (IOException e) {
+			throw new ExitException(e.getMessage());
+		}
+
+		deleteDecoys();
+		correctProt_accs();
+		info.stop(peptides.size(), count);
+	}
+
+	// parses the peptide mzid files
+	private void readMzIdentML(String filename) throws ExitException{
+		Info info = new Info("read ms peptides");
+		int count = 0;
+
+		// the MzIdentML object handles the mzid file and returns a string per peptide 
+		// in the same format as in the txt files
+		MzIdentML mzreader = new MzIdentML();
+		Vector<String> peps = mzreader.load(filename);
+
+		for (String pep : peps){
+			try {
+				peptides.add(new Peptide(pep));
+				peptides.lastElement().extrGS();
+				count++;
+			} 
+			catch (FormatException e) {
+				String message = "unreadable peptide:\t" + pep;
+				Logger.write(Configurator.getSysProperty("msPepSetName", "ipig")+".log", message);
+			}
+
+			Configurator.checkBreak();
+		}
+
+		deleteDecoys();
+		correctProt_accs();
+		info.stop(count, peps.size());
+	}
+	
+	// deletes all peptides with decoy proteins (when prot_acc start with "RRR")
+	private void deleteDecoys() {
+		Iterator<Peptide> it = peptides.iterator();
+		int old = peptides.size();
+		while (it.hasNext()){
+			Peptide p = it.next();
+			if (p.getProt_acc().startsWith("RRR")){
+				it.remove();
+			}
+		}
+		if (old >= peptides.size()){
+			String message = "deleted " + (old - peptides.size()) + " decoy peptides";
+			Logger.write(Configurator.getSysProperty("msPepSetName", "ipig")+".log", message);
+		}
+	}
+	
+	// corrects IPI and GI prot_accs
+	private void correctProt_accs(){
+		for (Peptide p : peptides){
+			String prot_acc = p.getProt_acc();
+			// extration of IPIs (e.g. "IPI:IPI00375560.3")
+			if (prot_acc.startsWith("IPI:")){
+				prot_acc = prot_acc.split("\\.")[0];
+				try{
+					prot_acc = prot_acc.split(":")[1];
+				}
+				catch (ArrayIndexOutOfBoundsException e){}
+			}
+			// extraction of GIs (e.g. "gi|4502027")
+			if (prot_acc.startsWith("gi|")){
+				prot_acc = prot_acc.substring(3, prot_acc.length());
+			}
+			p.setProt_acc(prot_acc);
+		}
+	}
+
+	/**
+	 * <p>This method reads useful protein ids from a uniprot id mapping file.</p>
+	 * 
+	 * <p>The prot_acc (UniProtKB-ID, IPI and GI) is used to find other ids such as UniProtKB-AC, RefSeq and Ensembl_TRS,
+	 * which are usually the reference in the gene annotation files (UCSC knownGene and Ensemble ensGene).</p>
+	 * 
+	 * @param filename uniprod id mapping file
+	 * @throws ExitException
+	 */
+	public void readProteinIds(String filename) throws ExitException{
+		Info info = new Info("read protein ids (1)");
+
+		HashMap<String,Vector<Integer>> protmap = getCommonProts("name");
+
+		int lines = 0;
+		int count = 0;
+		boolean countIt = false;
+		String[] splits;
+
+		try {
+			BufferedReader rsBuffer = new BufferedReader(new FileReader(new File(filename)));
+
+			while (rsBuffer.ready()){
+				splits = rsBuffer.readLine().split("\t");
+				countIt = false;
+				lines++;
+				
+				// check the UniprotKB-IDs
+				if (protmap.containsKey(splits[1])){
+					for (Integer i : protmap.get(splits[1])){
+//						if (!peptides.get(i).isProteinAssigned()){
+							peptides.get(i).setUniqueIdentifier(splits[0].split("-")[0]);
+							for (String ref : splits[3].split("; ")){
+								peptides.get(i).addRefSeq(ref.split("\\.")[0]);
+							}
+							if (splits.length > 20)
+								for (String ens : splits[20].split("; ")){
+									peptides.get(i).addEnsembl_TRS(ens);
+								}
+							peptides.get(i).setProteinAssigned(true);
+							countIt = true;
+//						}
+					}
+				}
+				// check the IPIs, might be more than one per line
+				for (String s : splits[7].split("; ")){
+					if (protmap.containsKey(s)){
+						for (Integer i : protmap.get(s)){
+//							if (!peptides.get(i).isProteinAssigned()){
+								peptides.get(i).setUniqueIdentifier(splits[0].split("-")[0]);
+								for (String ref : splits[3].split("; ")){
+									peptides.get(i).addRefSeq(ref.split("\\.")[0]);
+								}
+								if (splits.length > 20)
+									for (String ens : splits[20].split("; ")){
+										peptides.get(i).addEnsembl_TRS(ens);
+									}
+								peptides.get(i).setProteinAssigned(true);
+								countIt = true;
+//							}
+						}
+					}
+				}
+				// check the GIs, might be more than one per line
+				for (String s : splits[4].split("; ")){
+					if (protmap.containsKey(s)){
+						for (Integer i : protmap.get(s)){
+//							if (!peptides.get(i).isProteinAssigned()){
+								peptides.get(i).setUniqueIdentifier(splits[0].split("-")[0]);
+								for (String ref : splits[3].split("; ")){
+									peptides.get(i).addRefSeq(ref.split("\\.")[0]);
+								}
+								if (splits.length > 20)
+									for (String ens : splits[20].split("; ")){
+										peptides.get(i).addEnsembl_TRS(ens);
+									}
+								peptides.get(i).setProteinAssigned(true);
+								countIt = true;
+//							}
+						}
+					}
+				}
+				
+				if (countIt) count++;
+				Configurator.checkBreak();
+			}
+
+		} catch (FileNotFoundException e) {
+			throw new ExitException(e.getMessage());
+		} catch (IOException e) {
+			throw new ExitException(e.getMessage());
+		}
+
+		info.stop(count, lines);
+	}
+
+	/**
+	 * <p>This method tries to read some ids from a uniprot fasta file (whole proteome is recommended), particularly from fasta headers.</p>
+	 * 
+	 * <p>It uses the extracted gene names or gene symbols ({@link Peptide#extrGS()}) to match a potential fasta header and then extracts the UniProtKB-AC.</p>
+	 * 
+	 * <p>It only involves peptides which have no assigned protein already (e.g. after using {@link #readProteinIds(String)}).</p>
+	 * 
+	 * @param filename
+	 * @throws ExitException
+	 */
+	public void readFasta(String filename) throws ExitException{
+
+		Vector<Integer> unmapped = new Vector<Integer>();
+		for (int p=0; p<size(); p++){
+			if (!peptides.get(p).isProteinAssigned()) unmapped.add(p);
+		}
+		if (unmapped.size() == 0) return;
+
+		Info info = new Info("read protein ids (2)");
+		String line;
+		int count = 0;
+		int peps = 0;
+
+		String id;
+		HashMap<String,Vector<Integer>> gnmap = new HashMap<String,Vector<Integer>>();
+		for (int i=0; i<unmapped.size(); i++){
+			id = peptides.get(unmapped.get(i)).getGN();
+			if (!gnmap.containsKey(id))
+				gnmap.put(id, new Vector<Integer>());
+			gnmap.get(id).add(unmapped.get(i));
+		}
+
+		String label = "GN=";
+		String gn;
+		int pos, from, to;
+
+		try {
+			BufferedReader protBuffer = new BufferedReader(new FileReader(new File(filename)));
+
+			while (protBuffer.ready()){
+				line = protBuffer.readLine();
+				if (line.startsWith(">")){
+					peps++;
+					pos = line.indexOf(label);
+					if (pos >= 0){
+						from = pos + label.length();
+						to = line.indexOf(" ", from);
+						if (to >= 0){
+							gn =  line.substring(from, to);
+						}
+						else{
+							gn =  line.substring(from);
+						}
+						if (gnmap.containsKey(gn)){
+							for (Integer i : gnmap.get(gn)){
+								if (!peptides.get(i).isProteinAssigned()){
+									peptides.get(i).setUniqueIdentifier(line.split("\\|")[1].split("-")[0]);
+									peptides.get(i).setProteinAssigned(true);
+									count++;
+								}
+							}
+						}
+					}
+				}	
+				Configurator.checkBreak();
+			}
+			protBuffer.close();
+
+		} catch (FileNotFoundException e) {
+			throw new ExitException(e.getMessage());
+		} catch (IOException e) {
+			throw new ExitException(e.getMessage());
+		}
+
+		info.stop(count, peps);
+	}
+
+	/**
+	 * Providing a HashMap linking peptides to their protein, where a protein identifier is the key and a list of peptides the value.
+	 * 
+	 * @param ref Parameter to choose the protein identifier, "name" for the protein name (UniProtKB-ID) and "id" for protein ids (UniProtKB-AC and RefSeq).
+	 * @return HashMap with proteins as keys and Vectors of peptides as values
+	 */
+	public HashMap<String, Vector<Integer>> getCommonProts(String ref){
+		if (!(ref.matches("name") || ref.matches("id")))
+			return null;
+
+		HashMap<String,Vector<Integer>> commonProts = new HashMap<String,Vector<Integer>>();
+		Vector<String> id;
+
+		for (int i=0; i<peptides.size(); i++){
+			id = peptides.get(i).getProt(ref);
+			for (String s : id){
+				if (!commonProts.containsKey(s))
+					commonProts.put(s, new Vector<Integer>());
+				commonProts.get(s).add(i);
+			}
+		}
+		commonProts.remove("");
+
+		return commonProts;
+	}
+
+	/**
+	 * <p>Writes all mapped Peptides to txt files with same format as the peptide input txt files but extended by the fields
+	 * "chrom", "strand", "start_pos", "stop_pos" and "shared". They provide the genome position of the peptides.</p>
+	 * 
+	 * <p>As a peptide might have several matches, it is exported in several lines, each with the same peptide information but with other positions.</p>
+	 * 
+	 * <p>There will be two output files regarding the two matching methods (see {@link Mapper}).</p>
+	 * 
+	 * @param outputPath the path where the files should be written
+	 * @param msPepSetName a name for the peptides output, which is used as part of the filename
+	 * @throws ExitException
+	 */
+	public void writeMapped(String outputPath, String msPepSetName) throws ExitException{
+		Info info = new Info("write map peptides");
+		int count = 0;
+
+		String sep = "\t";
+
+		try {
+			BufferedWriter pepBuffer = new BufferedWriter(new FileWriter(new File(outputPath + msPepSetName + "_annomapped.txt")));
+			BufferedWriter pepBuffer2 = new BufferedWriter(new FileWriter(new File(outputPath + msPepSetName + "_altmapped.txt")));
+
+			pepBuffer.write(header + sep + "chrom" + sep + "strand" + sep
+					+ "start_pos" + sep + "stop_pos" + sep + "shared");
+			pepBuffer.newLine();
+
+			pepBuffer2.write(header + sep + "chrom" + sep + "strand" + sep
+					+ "start_pos" + sep + "stop_pos" + sep + "shared");
+			pepBuffer2.newLine();
+
+			Peptide pep;
+			for (int i=0; i<peptides.size(); i++){
+				if (peptides.get(i).isGeneMapped()){
+					pep = peptides.get(i);
+					for (Position pos : pep.getPositions()){
+						String line = pep.getRow() + sep
+								+ pos.getChrom() + sep + pos.getStrand() + sep
+								+ posToString(pos.getStartPos()) + sep + posToString(pos.getEndPos())
+								+ sep + pep.getPositions().size();
+						if (pep.getMatchMethod().matches(Mapper.ANNOTATION)){
+							pepBuffer.write(line);
+							pepBuffer.newLine();
+						}
+						if (pep.getMatchMethod().matches(Mapper.ALTERNATIVE)){
+							pepBuffer2.write(line);
+							pepBuffer2.newLine();
+						}
+					}
+					count++;
+				}
+				Configurator.checkBreak();
+			}
+
+			pepBuffer.close();
+			pepBuffer2.close();
+		} catch (IOException e) {
+			System.out.println("\nERROR: " + e.getMessage());
+		}
+
+		info.stop(count, peptides.size());
+	}
+
+	/**
+	 * <p>Writes all unmapped Peptides to txt files with same format as the peptide input txt files but extended by the field
+	 * "maperror", which tries to indicate whats missing for the mapping.</p>
+	 * 
+	 * @param outputPath the path where the file should be written
+	 * @param msPepSetName a name for the peptides output, which is used as part of the filename
+	 * @throws ExitException
+	 */
+	public void writeUnmapped(String outputPath, String msPepSetName) throws ExitException{
+		Info info = new Info("write left peptides");
+		int noprot = 0;
+		int nogene = 0;
+		int noalign = 0;
+
+		String sep = "\t";
+
+		try {
+			BufferedWriter pepBuffer = new BufferedWriter(new FileWriter(new File(outputPath + msPepSetName + "_unmapped.txt")));
+
+			pepBuffer.write(header + sep + "maperror");
+			pepBuffer.newLine();
+
+			for (int i=0; i<peptides.size(); i++){
+				StringBuffer line = new StringBuffer(peptides.get(i).getRow());
+				if (!peptides.get(i).isProteinAssigned() && !peptides.get(i).isGeneAssigned() && !peptides.get(i).isGeneMapped()){
+					line.append(sep + "noProt");
+					pepBuffer.write(line.toString());
+					pepBuffer.newLine();
+					noprot++;
+				}
+				if (peptides.get(i).isProteinAssigned() && !peptides.get(i).isGeneAssigned() && !peptides.get(i).isGeneMapped()){
+					line.append(sep + "noGene_(prots:" + peptides.get(i).getProt("id") + ")");
+					pepBuffer.write(line.toString());
+					pepBuffer.newLine();
+					nogene++;
+				}
+				if (peptides.get(i).isProteinAssigned() && peptides.get(i).isGeneAssigned() && !peptides.get(i).isGeneMapped()){
+					line.append(sep + "noMatch_(prots:" + peptides.get(i).getProt("id") + ",genes:" + peptides.get(i).getGenes() + ")");
+					pepBuffer.write(line.toString());
+					pepBuffer.newLine();
+					noalign++;
+				}
+				Configurator.checkBreak();
+			}
+
+			pepBuffer.close();
+		} catch (IOException e) {
+			System.out.println("\nERROR: " + e.getMessage());
+		}
+
+		info.stop(noprot+nogene+noalign, peptides.size());
+		System.out.println("(no prots: " + noprot + ",no genes: " + nogene + ",no matches: " + noalign + ")");
+	}
+
+	// makes a String representation with comma separation of a position vector
+	private String posToString(Vector<Integer> pos){
+		StringBuffer sb = new StringBuffer();
+		for (int i : pos){
+			sb.append(i + ",");
+		}
+		return sb.toString();
+	}
+
+	/**
+	 * Returns a HashMap indicating which Genes the mapped Peptides have in common.
+	 * So the key is a gene name and the value is list of Peptide indices. 
+	 * @return a HashMap describing Gene Peptide connections
+	 */
+	public HashMap<String, Vector<Integer>> getCommonGenes() {
+		HashMap<String,Vector<Integer>> commonGenes = new HashMap<String,Vector<Integer>>();
+		String id;
+		Peptide pep;
+
+		for (int i=0; i<peptides.size(); i++){
+			pep = peptides.get(i);
+			for (Position pos : pep.getPositions()){
+				id = pos.getGeneName();
+				if (commonGenes.containsKey(id)){
+					commonGenes.get(id).add(i);
+				}
+				else{
+					commonGenes.put(id, new Vector<Integer>());
+					commonGenes.get(id).add(i);
+				}
+			}
+		}
+
+		return commonGenes;
+	}
+
+	/**
+	 * Returns the indices of Peptides which are not mapped yet.
+	 * @return Unmapped Peptides indices
+	 */
+	public Vector<Integer> getUnmapped() {
+		Vector<Integer> unmapped = new Vector<Integer>();
+		for (int p=0; p<size(); p++){
+			if (!peptides.get(p).isGeneMapped()) unmapped.add(p);
+		}
+		return unmapped;
+	}
+
+	/**
+	 * <p>Returns a list of indices for a subset of Peptides depending on the "unique" property.</p>
+	 * 
+	 * <p>Parameters can be either "unique" (returns Peptides with unique value "1"),
+	 * "nonunique" (returns Peptides with unique value "0") or "rest" (returns Peptides with an other unique value than "1" or "0").</p>
+	 * 
+	 * @param param either "unique", "nonunique" or "rest", anything else yields a empty Vector
+	 * @return Vector with Peptides indices
+	 */
+	public Vector<Integer> getUnique(String param) {
+		Vector<Integer> unique = new Vector<Integer>();
+
+		if (param.matches("unique")){
+			for (int p=0; p<size(); p++){
+				if (peptides.get(p).getPep_isunique().matches("1")) unique.add(p);
+			}
+		}
+		if (param.matches("nonunique")){
+			for (int p=0; p<size(); p++){
+				if (peptides.get(p).getPep_isunique().matches("0")) unique.add(p);
+			}
+		}
+		if (param.matches("rest")){
+			for (int p=0; p<size(); p++){
+				if (!peptides.get(p).getPep_isunique().matches("1") && !peptides.get(p).getPep_isunique().matches("0"))
+					unique.add(p);
+			}
+		}
+
+		return unique;
+	}
+
+	// A Comparator to sort Peptides according to query, sequence and score
+	private class PeptideComparator implements Comparator<Peptide> {
+		@Override
+		public int compare(Peptide pep0, Peptide pep1) {
+			Integer query0 = Integer.parseInt(pep0.getPep_query());
+			Integer query1 = Integer.parseInt(pep1.getPep_query());
+			int result = query0.compareTo(query1);
+			if (result == 0){
+				String seq0 = pep0.getSequence();
+				String seq1 = pep1.getSequence();
+				result = seq0.compareTo(seq1);
+				if (result == 0){
+					Double score0 = Double.parseDouble(pep0.getPep_Score());
+					Double score1 = Double.parseDouble(pep1.getPep_Score());
+					result = -1 * score0.compareTo(score1);
+				}
+			}
+			return result;
+		}
+	}
+
+	/**
+	 * This method sorts the Peptides to group them according to the pep_query
+	 * and gives their Positions a unique String (modifier) each (e.g. "aaa", "aab", "aac").
+	 * So in the and each Position will have a unique identifier (pep_query + modifier) for the outputs.
+	 */
+	public void sortPeptides(){
+		Collections.sort(peptides, new PeptideComparator());
+
+		Vector<Peptide> peps = new Vector<Peptide>();
+		peps.add(peptides.get(0));
+		for (int i=1; i<peptides.size(); i++){
+			if (peps.lastElement().getPep_query().matches(peptides.get(i).getPep_query())){
+				peps.add(peptides.get(i));
+			}
+			else{
+				modify(peps);
+				peps.clear();
+				peps.add(peptides.get(i));
+			}
+		}
+		modify(peps);
+	}
+
+	// add a unique modifier String to each Position in the passed Peptides
+	private void modify(Vector<Peptide> peps){
+		int positions = 0;
+		for (Peptide pep : peps){
+			positions += pep.getPositions().size();
+		}
+
+		if (positions>1){
+			int digits = (int) Math.ceil(Math.log(positions)/Math.log(26));
+			char[] mod = new char[digits];
+			Arrays.fill(mod, 'a');
+			for (Peptide pep : peps){
+				for (Position pos : pep.getPositions()){
+					pos.setModifier(new String(mod));
+
+					mod[digits-1]++;
+					for (int m=mod.length-1; m>0; m--){
+						if (mod[m] == 'a'+26){
+							mod[m] = 'a';
+							mod[m-1]++;
+						}
+					}
+				}
+			}
+		}
+	}
+
+	/**
+	 * Counts the number of Positions (!) of peptides which got mapped to a gene.
+	 * 
+	 * @return Number of Positions of mapped peptides.
+	 */
+	public int mapPosCount(){
+		int count = 0;	
+		for (Peptide pep : peptides){
+			count += pep.getPositions().size();
+		}
+		return count;
+	}
+
+	/**
+	 * Counts the number of Peptides (!) which got mapped to a gene by a given method.
+	 * 
+	 * @param method 
+	 * @return Number of mapped peptides.
+	 */
+	public int mapPepCount(String method){
+		int count = 0;	
+		for (Peptide pep : peptides){
+			if (pep.getMatchMethod() != null && pep.getMatchMethod().matches(method))
+				count ++;
+		}
+		return count;
+	}
+}
diff --git a/src/de/rki/ng4/ipig/exceptions/ExitException.java b/src/de/rki/ng4/ipig/exceptions/ExitException.java
new file mode 100644
index 0000000..4f31573
--- /dev/null
+++ b/src/de/rki/ng4/ipig/exceptions/ExitException.java
@@ -0,0 +1,21 @@
+/* Copyright (c) 2012,
+ * Mathias Kuhring, KuhringM at rki.de, Robert Koch-Institut, Germany, 
+ * All rights reserved. For details, please note the license.txt.
+ */
+
+package de.rki.ng4.ipig.exceptions;
+
+/**
+ * A simple exception used to exit the program. It's usually thrown up to the main class.
+ * 
+ * @author Mathias Kuhring
+ */
+public class ExitException extends Exception {
+
+	private static final long serialVersionUID = 1L;
+
+	public ExitException(String string) {
+		super(string);
+	}
+
+}
diff --git a/src/de/rki/ng4/ipig/exceptions/FormatException.java b/src/de/rki/ng4/ipig/exceptions/FormatException.java
new file mode 100644
index 0000000..5b6d98e
--- /dev/null
+++ b/src/de/rki/ng4/ipig/exceptions/FormatException.java
@@ -0,0 +1,21 @@
+/* Copyright (c) 2012,
+ * Mathias Kuhring, KuhringM at rki.de, Robert Koch-Institut, Germany, 
+ * All rights reserved. For details, please note the license.txt.
+ */
+
+package de.rki.ng4.ipig.exceptions;
+
+/**
+ * A simple exception used for any kind of format failure in input data.
+ * 
+ * @author Mathias Kuhring
+ */
+public class FormatException extends Exception {
+
+	private static final long serialVersionUID = 1L;
+
+	public FormatException(String string) {
+		super(string);
+	}
+
+}
diff --git a/src/de/rki/ng4/ipig/gui/Downloader.java b/src/de/rki/ng4/ipig/gui/Downloader.java
new file mode 100644
index 0000000..4078108
--- /dev/null
+++ b/src/de/rki/ng4/ipig/gui/Downloader.java
@@ -0,0 +1,988 @@
+/* Copyright (c) 2012,
+ * Mathias Kuhring, KuhringM at rki.de, Robert Koch-Institut, Germany, 
+ * All rights reserved. For details, please note the license.txt.
+ */
+
+package de.rki.ng4.ipig.gui;
+
+import java.awt.Color;
+import java.awt.EventQueue;
+import java.awt.Rectangle;
+import java.awt.Toolkit;
+import java.awt.event.ActionEvent;
+import java.awt.event.ActionListener;
+import java.awt.event.ItemEvent;
+import java.awt.event.ItemListener;
+import java.awt.event.WindowAdapter;
+import java.awt.event.WindowEvent;
+import java.io.BufferedInputStream;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.io.RandomAccessFile;
+import java.net.Authenticator;
+import java.net.MalformedURLException;
+import java.net.PasswordAuthentication;
+import java.text.DecimalFormat;
+import java.util.Vector;
+
+import javax.swing.AbstractButton;
+import javax.swing.JButton;
+import javax.swing.JCheckBox;
+import javax.swing.JFileChooser;
+import javax.swing.JFrame;
+import javax.swing.JLabel;
+import javax.swing.JList;
+import javax.swing.JOptionPane;
+import javax.swing.JPanel;
+import javax.swing.JProgressBar;
+import javax.swing.JScrollPane;
+import javax.swing.JTextField;
+import javax.swing.ListSelectionModel;
+import javax.swing.UIManager;
+import javax.swing.border.TitledBorder;
+import javax.swing.event.ListSelectionEvent;
+import javax.swing.event.ListSelectionListener;
+
+import org.apache.commons.compress.archivers.ArchiveException;
+import org.apache.commons.compress.archivers.ArchiveStreamFactory;
+import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
+import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
+import org.apache.commons.compress.utils.IOUtils;
+import org.apache.commons.net.ftp.FTPClient;
+
+import de.rki.ng4.ipig.exceptions.ExitException;
+import de.rki.ng4.ipig.tools.Configurator;
+
+/**
+ * <p>The Downloader is a gui which helps downloading necessary data for iPiG.</p>
+ * 
+ * <p>It provides a list of available organism and data which can be selected (loaded from file).
+ * Data is downloaded with use of an ftp library (hence, links must be from an ftp server).
+ * Optinally, the downloaded archives can be extracted directly.<p>
+ * 
+ * @author Mathias Kuhring
+ *
+ */
+public class Downloader {
+
+	private JFrame frame;
+	private JProgressBar progressBar;
+
+	private JButton btnStart;
+	private JButton btnStop;
+	private JButton btnSettings;
+
+	private JCheckBox chckbxUseProxy;
+
+	private OutputJTextArea outputWindow;
+
+	private Loader loader;
+
+	// Temporary configuration file to save settings
+	private final String conf = "dl.conf";
+
+	private Vector<Organism> orgs;
+	private JScrollPane scrollPane_2;
+	private JList orglist;
+	private JTextField tf_outputPath;
+	private JButton btnOutputPath;
+	private JLabel lbl_outputPath;
+	private JCheckBox chckbxExtractFiles;
+	private JCheckBox chckbxAminoAcidSequences;
+	private JCheckBox chckbxIdMappings;
+	private JCheckBox chckbxGenome;
+	private JCheckBox chckbxProteome;
+	private JCheckBox chckbxGeneAnnotations;
+	private JButton btnExit;
+	private AbstractButton btnHelp;
+	private JCheckBox chckbxFtpStatus;
+
+	/**
+	 * Launch the application.
+	 */
+	public static void main(String[] args) {
+		EventQueue.invokeLater(new Runnable() {
+			public void run() {
+				try {
+					Downloader window = new Downloader();
+					window.frame.setVisible(true);
+				} catch (Exception e) {
+					e.printStackTrace();
+				}
+			}
+		});
+	}
+
+	/**
+	 * Create the application.
+	 */
+	public Downloader() {
+		// try native look and feel
+		try {
+			UIManager.setLookAndFeel(UIManager.getSystemLookAndFeelClassName());
+		} catch(Exception e) {
+			System.out.println("Error setting native LAF: " + e);
+		}
+
+		orgs = loadOrganisms();
+		initialize();
+		updateFields();
+	}
+
+	/**
+	 * Initialize the contents of the frame.
+	 */
+	private void initialize() {
+		frame = new JFrame();
+		frame.addWindowListener(new WindowAdapter() {
+			@Override
+			public void windowClosing(WindowEvent arg0) {
+				saveConfig();
+			}
+		});
+		frame.setIconImage(Toolkit.getDefaultToolkit().getImage(Downloader.class.getResource("/images/rki.png")));
+		frame.setResizable(false);
+		frame.setTitle("Downloader GUI");
+		frame.setBounds(100, 100, 500, 600);
+		frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
+		frame.getContentPane().setLayout(null);
+
+		progressBar = new JProgressBar();
+		progressBar.setBounds(10, 394, 473, 19);
+		frame.getContentPane().add(progressBar);
+
+		outputWindow = new OutputJTextArea();
+		outputWindow.setEditable(false);
+		outputWindow.setSize(473, 157);
+		outputWindow.setLocation(10, 400);
+
+		JScrollPane scrollPane = new JScrollPane(outputWindow);
+		scrollPane.setBounds(10, 424, 473, 133);
+		frame.getContentPane().add(scrollPane);
+
+		JPanel panelOptions = new JPanel();
+		panelOptions.setBorder(new TitledBorder(UIManager.getBorder("TitledBorder.border"), "Options", TitledBorder.CENTER, TitledBorder.TOP, null, new Color(0, 70, 213)));
+		panelOptions.setBounds(10, 284, 474, 45);
+		frame.getContentPane().add(panelOptions);
+		panelOptions.setLayout(null);
+
+		btnSettings = new JButton("Settings");
+		btnSettings.setEnabled(false);
+		btnSettings.addActionListener(new ActionListener() {
+			public void actionPerformed(ActionEvent arg0) {
+				ProxyDialog.run();
+			}
+		});
+		btnSettings.setBounds(389, 14, 75, 23);
+		panelOptions.add(btnSettings);
+
+		chckbxUseProxy = new JCheckBox("Use proxy");
+		chckbxUseProxy.addItemListener(new ItemListener() {
+			public void itemStateChanged(ItemEvent arg0) {
+				if (chckbxUseProxy.isSelected()){
+					btnSettings.setEnabled(true);
+					Configurator.setProperty("proxyUse", "true");
+				}
+				else{
+					btnSettings.setEnabled(false);
+					Configurator.setProperty("proxyUse", "false");
+				}
+			}
+		});
+		chckbxUseProxy.setSelected(Boolean.parseBoolean(Configurator.getProperty("proxyUse", "false")));
+		chckbxUseProxy.setBounds(301, 14, 75, 23);
+		panelOptions.add(chckbxUseProxy);
+
+		chckbxExtractFiles = new JCheckBox("Extract files after download");
+		chckbxExtractFiles.addItemListener(new ItemListener() {
+			public void itemStateChanged(ItemEvent arg0) {
+				if (chckbxExtractFiles.isSelected()){
+					Configurator.setProperty("extractFiles", "true");
+				}
+				else{
+					Configurator.setProperty("extractFiles", "false");
+				}
+			}
+		});
+		chckbxExtractFiles.setSelected(Boolean.parseBoolean(Configurator.getProperty("extractFiles", "true")));
+		chckbxExtractFiles.setBounds(6, 14, 159, 23);
+		panelOptions.add(chckbxExtractFiles);
+
+		chckbxFtpStatus = new JCheckBox("Display ftp status");
+		chckbxFtpStatus.addItemListener(new ItemListener() {
+			public void itemStateChanged(ItemEvent arg0) {
+				if (chckbxFtpStatus.isSelected()){
+					Configurator.setProperty("ftpStatus", "true");
+				}
+				else{
+					Configurator.setProperty("ftpStatus", "false");
+				}
+			}
+		});
+		chckbxFtpStatus.setSelected(Boolean.parseBoolean(Configurator.getProperty("ftpStatus", "false")));
+		chckbxFtpStatus.setBounds(181, 14, 109, 23);
+		panelOptions.add(chckbxFtpStatus);
+
+
+		JPanel panelSaveFolder = new JPanel();
+		panelSaveFolder.setBorder(new TitledBorder(null, "Select Save Folder", TitledBorder.CENTER, TitledBorder.TOP, null, null));
+		panelSaveFolder.setBounds(9, 213, 474, 60);
+		frame.getContentPane().add(panelSaveFolder);
+
+		JPanel panelOrganisms = new JPanel();
+		panelOrganisms.setBorder(new TitledBorder(null, "Select Organism", TitledBorder.CENTER, TitledBorder.TOP, null, null));
+		panelOrganisms.setBounds(10, 11, 473, 135);
+		panelOrganisms.setLayout(null);
+		frame.getContentPane().add(panelOrganisms);
+
+		orglist = new JList(getOrgList());
+		orglist.addListSelectionListener(new ListSelectionListener() {
+			public void valueChanged(ListSelectionEvent arg0) {
+				changeDataCheckboxes(true);
+			}
+		});
+		orglist.setSelectionMode(ListSelectionModel.SINGLE_SELECTION);
+
+		scrollPane_2 = new JScrollPane(orglist);
+		scrollPane_2.setBounds(10, 16, 453, 105);
+		panelOrganisms.add(scrollPane_2);
+		panelSaveFolder.setLayout(null);
+
+		JPanel panel_outputPath = new JPanel();
+		panel_outputPath.setOpaque(false);
+		panel_outputPath.setBounds(new Rectangle(236, 27, 1, 1));
+		panel_outputPath.setLayout(null);
+		panel_outputPath.setBounds(10, 14, 455, 36);
+		panelSaveFolder.add(panel_outputPath);
+
+		tf_outputPath = new JTextField();
+		tf_outputPath.setEditable(false);
+		tf_outputPath.setText("");
+		tf_outputPath.setColumns(10);
+		tf_outputPath.setBounds(0, 14, 356, 20);
+		panel_outputPath.add(tf_outputPath);
+
+		btnOutputPath = new JButton("Open");
+		btnOutputPath.setBounds(366, 13, 89, 23);
+		btnOutputPath.addActionListener(new ActionListener() {
+			public void actionPerformed(ActionEvent arg0) {
+				File file = choosePath(new File(tf_outputPath.getText()));
+				if (file != null){
+					Configurator.setProperty("outputPath",file.getAbsolutePath());
+					updateFields();
+				}
+			}
+		});
+		panel_outputPath.add(btnOutputPath);
+
+		lbl_outputPath = new JLabel("Output Path:");
+		lbl_outputPath.setBounds(0, 0, 374, 14);
+
+		panel_outputPath.add(lbl_outputPath);
+
+		JPanel panelData = new JPanel();
+		panelData.setBorder(new TitledBorder(null, "Select Data", TitledBorder.CENTER, TitledBorder.TOP, null, null));
+		panelData.setBounds(10, 157, 474, 45);
+		frame.getContentPane().add(panelData);
+		panelData.setLayout(null);
+
+		chckbxGeneAnnotations = new JCheckBox("Gene Annotations");
+		chckbxGeneAnnotations.setEnabled(false);
+		chckbxGeneAnnotations.addItemListener(new ItemListener() {
+			public void itemStateChanged(ItemEvent arg0) {
+				if (chckbxGeneAnnotations.isSelected()){
+					Configurator.setProperty("loadGenes", "true");
+				}
+				else{
+					Configurator.setProperty("loadGenes", "false");
+				}
+			}
+		});
+		chckbxGeneAnnotations.setSelected(Boolean.parseBoolean(Configurator.getProperty("loadGenes", "true")));
+		chckbxGeneAnnotations.setBounds(6, 14, 111, 23);
+		panelData.add(chckbxGeneAnnotations);
+
+		chckbxAminoAcidSequences = new JCheckBox("Amino Acid Seqs.");
+		chckbxAminoAcidSequences.setEnabled(false);
+		chckbxAminoAcidSequences.addItemListener(new ItemListener() {
+			public void itemStateChanged(ItemEvent arg0) {
+				if (chckbxAminoAcidSequences.isSelected()){
+					Configurator.setProperty("loadAaseqs", "true");
+				}
+				else{
+					Configurator.setProperty("loadAaseqs", "false");
+				}
+			}
+		});
+		chckbxAminoAcidSequences.setSelected(Boolean.parseBoolean(Configurator.getProperty("loadAaseqs", "true")));
+		chckbxAminoAcidSequences.setBounds(119, 14, 111, 23);
+		panelData.add(chckbxAminoAcidSequences);
+
+		chckbxIdMappings = new JCheckBox("ID Mappings");
+		chckbxIdMappings.setEnabled(false);
+		chckbxIdMappings.addItemListener(new ItemListener() {
+			public void itemStateChanged(ItemEvent arg0) {
+				if (chckbxIdMappings.isSelected()){
+					Configurator.setProperty("loadIdmappings", "true");
+				}
+				else{
+					Configurator.setProperty("loadIdmappings", "false");
+				}
+			}
+		});
+		chckbxIdMappings.setSelected(Boolean.parseBoolean(Configurator.getProperty("loadIdmappings", "true")));
+		chckbxIdMappings.setBounds(232, 14, 85, 23);
+		panelData.add(chckbxIdMappings);
+
+		chckbxProteome = new JCheckBox("Proteome");
+		chckbxProteome.setEnabled(false);
+		chckbxProteome.addItemListener(new ItemListener() {
+			public void itemStateChanged(ItemEvent arg0) {
+				if (chckbxProteome.isSelected()){
+					Configurator.setProperty("loadProts", "true");
+				}
+				else{
+					Configurator.setProperty("loadProts", "false");
+				}
+			}
+		});
+		chckbxProteome.setSelected(Boolean.parseBoolean(Configurator.getProperty("loadProts", "true")));
+		chckbxProteome.setBounds(319, 14, 71, 23);
+		panelData.add(chckbxProteome);
+
+		chckbxGenome = new JCheckBox("Genome");
+		chckbxGenome.setEnabled(false);
+		chckbxGenome.addItemListener(new ItemListener() {
+			public void itemStateChanged(ItemEvent arg0) {
+				if (chckbxGenome.isSelected()){
+					Configurator.setProperty("loadChroms", "true");
+				}
+				else{
+					Configurator.setProperty("loadChroms", "false");
+				}
+			}
+		});
+		chckbxGenome.setSelected(Boolean.parseBoolean(Configurator.getProperty("loadChroms", "true")));
+		chckbxGenome.setBounds(392, 14, 65, 23);
+		panelData.add(chckbxGenome);
+
+		JPanel panelDownload = new JPanel();
+		panelDownload.setBorder(new TitledBorder(null, "Download", TitledBorder.CENTER, TitledBorder.TOP, null, null));
+		panelDownload.setLayout(null);
+		panelDownload.setBounds(10, 340, 147, 44);
+		frame.getContentPane().add(panelDownload);
+
+		JPanel panel_1 = new JPanel();
+		panel_1.setLayout(null);
+		panel_1.setBounds(6, 14, 135, 23);
+		panelDownload.add(panel_1);
+
+		btnStart = new JButton("Start");
+		btnStart.setBounds(0, 0, 65, 23);
+		panel_1.add(btnStart);
+		btnStart.addActionListener(new ActionListener() {
+			public void actionPerformed(ActionEvent arg0) {
+				loader = new Loader();
+				loader.start();
+			}
+		});
+
+		btnStop = new JButton("Stop");
+		btnStop.setBounds(70, 0, 65, 23);
+		panel_1.add(btnStop);
+		btnStop.addActionListener(new ActionListener() {
+			public void actionPerformed(ActionEvent arg0) {
+				if (loader != null && loader.isAlive()){
+					Configurator.setBreak(true);
+					btnStop.setEnabled(false);
+				}
+				else{
+					enableButtons(true);
+					Configurator.setBreak(false);
+					outputWindow.redirectSystemStreams(false, false);
+				}	
+			}
+		});
+		btnStop.setEnabled(false);
+
+		JPanel panelGeneral = new JPanel();
+		panelGeneral.setBorder(new TitledBorder(UIManager.getBorder("TitledBorder.border"), "General", TitledBorder.CENTER, TitledBorder.TOP, null, new Color(0, 70, 213)));
+		panelGeneral.setLayout(null);
+		panelGeneral.setBounds(336, 339, 147, 44);
+		frame.getContentPane().add(panelGeneral);
+
+		JPanel panel_3 = new JPanel();
+		panel_3.setLayout(null);
+		panel_3.setOpaque(false);
+		panel_3.setBounds(6, 14, 135, 23);
+		panelGeneral.add(panel_3);
+
+		btnHelp = new JButton("Help");
+		btnHelp.addActionListener(new ActionListener() {
+			public void actionPerformed(ActionEvent arg0) {
+				try {
+					String help = "/help/downloader_help.txt";
+					InputStream is = Downloader.class.getResourceAsStream(help);
+					BufferedReader helpbf = new BufferedReader(new InputStreamReader(is));
+					outputWindow.setText("");
+					while (helpbf.ready()){
+						outputWindow.append(helpbf.readLine() + "\n");
+					}
+					outputWindow.setCaretPosition(0); 
+					is.close();
+				} catch (IOException e) {
+					outputWindow.append("error: " + e.getLocalizedMessage());
+				}
+			}
+		});
+		btnHelp.setBounds(0, 0, 65, 23);
+		panel_3.add(btnHelp);
+
+		btnExit = new JButton("Exit");
+		btnExit.addActionListener(new ActionListener() {
+			public void actionPerformed(ActionEvent arg0) {
+				saveConfig();
+				System.exit(0);
+			}
+		});
+		btnExit.setBounds(70, 0, 65, 23);
+		panel_3.add(btnExit);
+	}
+
+	// a thread running the download and controlling the output and buttons meanwhile
+	class Loader extends Thread{
+		@Override public void run(){
+			Configurator.setBreak(false);
+			enableButtons(false);
+			outputWindow.setText("");
+			outputWindow.redirectSystemStreams(true, true);
+
+			try {
+				load();
+			} catch (ExitException e) {
+				System.out.println("\n" + e.getMessage());
+			} catch (Exception e) {
+				System.out.println(e.getMessage());
+			}
+
+			outputWindow.redirectSystemStreams(false, false);
+			System.gc();
+			enableButtons(true);
+			Configurator.setBreak(false);
+		}
+	}
+
+	// enabling/disabling of buttons for running the downloads
+	// stop button is always set contradictory
+	private void enableButtons(boolean b){
+		btnStart.setEnabled(b);
+		btnStop.setEnabled(!b);
+
+		btnOutputPath.setEnabled(b);
+
+		orglist.setEnabled(b);
+
+		btnHelp.setEnabled(b);
+		btnExit.setEnabled(b);
+
+		chckbxExtractFiles.setEnabled(b);
+		chckbxFtpStatus.setEnabled(b);
+		chckbxUseProxy.setEnabled(b);
+		btnSettings.setEnabled(b && chckbxUseProxy.isSelected());
+
+		changeDataCheckboxes(b);
+	}
+
+	// enabling/disabling of data checkboxes 
+	private void changeDataCheckboxes(boolean b){
+			boolean orgSelected = !orglist.isSelectionEmpty();
+			Organism org = orgs.get(orglist.getSelectedIndex());
+			chckbxGeneAnnotations.setEnabled(b && orgSelected && org.isGenesAvailable());
+			chckbxAminoAcidSequences.setEnabled(b && orgSelected && org.isAaseqsAvailable());
+			chckbxIdMappings.setEnabled(b && orgSelected && org.isIdmappingsAvailable());
+			chckbxProteome.setEnabled(b && orgSelected && org.isProtsAvailable());
+			chckbxGenome.setEnabled(b && orgSelected && org.isChromsAvailable());
+	}
+
+	// function run by the download thread (Loader). Downloads all selected files and extracts them if wanted.
+	private void load() throws ExitException, MalformedURLException {
+		if (!isSelectionMissing()){
+			Organism org = orgs.get(orglist.getSelectedIndex());
+			Vector<String> links = org.getLinks();
+			Vector<File> localFiles = new Vector<File>();
+			String outputPath = new File(Configurator.getProperty("outputPath","")).getAbsolutePath() + "/";
+			String filename;
+			File file;
+			boolean write;
+			boolean loaded;
+
+			for (String link : links){
+				Configurator.checkBreak();
+				filename = link.substring(link.lastIndexOf("/"), link.length());
+				file = new File(outputPath + filename);
+				write = true;
+				loaded = false;
+				System.out.println("initializing download of " + file.getName());
+				if (file.exists()) write = questionDialog(file.getName());
+				if (write) {
+					loaded = downloadFile(link, file.getAbsolutePath());
+				}
+				if (loaded || !write) localFiles.add(file);
+			}
+
+			File outfile;
+			if (Boolean.parseBoolean(Configurator.getProperty("extractFiles", "true"))){
+				for (File localFile : localFiles){
+					Configurator.checkBreak();
+					filename = localFile.getAbsolutePath();
+					outfile = new File(filename.substring(0, filename.lastIndexOf('.')));
+					write = true;
+					System.out.println("initializing extraction of " + localFile.getName());
+					if (outfile.exists()) write = questionDialog(outfile.getName());
+					if (write) {
+						unpack(localFile);
+					}
+				}
+			}
+
+			System.out.println("done");
+		}
+	}
+
+	// function to download a file from a ftp server. ftp server is extracted from the link.
+	// user is always "anonymous". Proxy settings are taken from Configurator, if necessary.
+	private boolean downloadFile(String link, String filename) throws ExitException {
+		try {
+			if (Boolean.parseBoolean(Configurator.getProperty("proxyUse", "false"))){
+				initProxy(true);
+				initAuthenticator();
+			}
+			else{
+				initProxy(false);
+				Authenticator.setDefault(null);
+			}
+
+			// FTP settings
+			boolean status = Boolean.parseBoolean(Configurator.getProperty("ftpStatus", "false"));
+			boolean ftpOk = true;
+
+			FTPClient ftp = new FTPClient();
+			String hostname = link.substring(0, link.indexOf("/"));
+			String remote = link.substring(link.indexOf("/"), link.length());
+
+			ftp.connect(hostname);			
+			ftp.enterLocalPassiveMode();
+			if( status ) { System.out.print( " ftp status: " + ftp.getReplyString() ); }
+
+			ftpOk &= ftp.login("anonymous","my at mail.address");
+			if( status ) { System.out.print( " ftp status: " + ftp.getReplyString() ); }
+
+			ftpOk &= ftp.setFileType(FTPClient.BINARY_FILE_TYPE);
+			if( status ) { System.out.print( " ftp status: " + ftp.getReplyString() ); }
+
+			if (!ftpOk){
+				System.out.println("connection to ftp server refused");
+				return false;
+			}
+
+			double max = 0;
+			try {
+				max = ftp.listFiles(remote)[0].getSize();
+			}
+			catch (Exception e){
+				System.out.println( " file size not received -> progress not available!" );
+			}
+
+			FileOutputStream out = new FileOutputStream(filename);
+			InputStream is = ftp.retrieveFileStream(remote);
+			if( status ) { System.out.print( " ftp status: " + ftp.getReplyString() ); }
+
+			try{
+				progressBar.setValue(0);
+				progressBar.setMaximum(Integer.MAX_VALUE);
+				System.out.println(" waiting for file (" + (new DecimalFormat("###0.00")).format(max/1024/1024) + " MB)");
+
+				byte[] buffer = new byte[1024];
+				for( int n; (n = is.read(buffer)) != -1; out.write(buffer, 0, n) ){
+					progressBar.setValue((int) (((double) progressBar.getValue()) + (((double) n) / ((double) max) * ((double) Integer.MAX_VALUE))));
+					Configurator.checkBreak();
+				};
+				progressBar.setValue(progressBar.getMaximum());
+			}
+			finally{
+				is.close();
+				out.close();
+				ftp.disconnect();
+			}
+		} catch (MalformedURLException e) {
+			System.out.println(" can not download file:\n  " + e.getMessage());
+			return false;
+		} catch (FileNotFoundException e) {
+			System.out.println(" can not download file:\n  " + e.getMessage());
+			return false;
+		} catch (IOException e) {
+			System.out.println(" can not download file:\n  " + e.getMessage());
+			return false;
+		}
+		return true;
+	}
+
+	// save current configuration to a file. It's used when the window/program is closed.
+	private void saveConfig(){
+		try {
+			Configurator.saveProperties(conf);
+		} catch (ExitException e) {
+			//			JOptionPane.showMessageDialog(null, "Could not save configuration.\nError message: " + e.getMessage());
+		}
+	}
+
+	// enables/disables proxy usage with settings from the Configurator.
+	private void initProxy(boolean b) throws ExitException{
+		try{
+			if (b){
+				String host = Configurator.getProperty("proxyHost","");
+				int port = Integer.parseInt(Configurator.getProperty("proxyPort",""));
+
+				System.setProperty("proxyHost",host);
+				System.setProperty("proxyPort",Integer.toString(port));
+			}
+			else{
+				System.setProperty("proxyHost","");
+				System.setProperty("proxyPort","");
+			}
+		}
+		catch (NumberFormatException e){
+			throw new ExitException("error: port is not a number");
+		}
+	}
+
+	// enables/disables authentication. May be necessary for proxy usage.
+	private void initAuthenticator(){
+		if (Boolean.parseBoolean(Configurator.getProperty("proxyAuth","false"))){
+			Authenticator.setDefault(new Authenticator() {
+				@Override protected PasswordAuthentication getPasswordAuthentication() {
+					String user = Configurator.getProperty("proxyUser", "");
+					String pass = Configurator.getProperty("proxyPass", "");
+					return new PasswordAuthentication(user, pass.toCharArray());
+				}
+			});
+		}
+		else{
+			Authenticator.setDefault(null);
+		}
+	}
+
+	// loads the download links for organisms indicated in the file "organisms" (as resource stream).
+	private Vector<Organism> loadOrganisms(){
+		Vector<Organism> orgs = new Vector<Organism>();
+		try {
+			String help = "/organisms";
+			InputStream is = Downloader.class.getResourceAsStream(help);
+			BufferedReader orgbf = new BufferedReader(new InputStreamReader(is));
+
+			String line;
+			while ((line = orgbf.readLine()) != null){
+				try{
+					if (line.startsWith("org")){
+						orgs.add(new Organism(line.split("=")[1]));
+					}
+					else if (line.startsWith("genes")){
+						orgs.lastElement().setGenes(line.split("=")[1]);
+					}
+					else if (line.startsWith("aaseqs")){
+						orgs.lastElement().setAaseqs(line.split("=")[1]);
+					}
+					else if (line.startsWith("idmappings")){
+						orgs.lastElement().setIdmappings(line.split("=")[1]);
+					}
+					else if (line.startsWith("prots")){
+						orgs.lastElement().setProts(line.split("=")[1]);
+					}
+					else if (line.startsWith("chroms")){
+						orgs.lastElement().addChroms(line.split("=")[1]);
+					}
+				}catch (ArrayIndexOutOfBoundsException e){
+					// exception prob. means that there is not text/link after an equal sign.
+					// this is ignored here and an Organism just don't return empty links than.
+				}
+			}
+			is.close();
+		} catch (IOException e) {
+			outputWindow.append("error: " + e.getLocalizedMessage());
+		}
+		return orgs;
+	}
+
+	// returns a list with available organisms by name.
+	private Vector<String> getOrgList(){
+		Vector<String> orgList = new Vector<String>();
+		for (Organism o : orgs){
+			orgList.add(o.getName());
+		}
+		return orgList;
+	}
+
+	// An Organism keeps the links of the different data sources and returns them at once for the download (depending on user settings).
+	// It's used by the "loadOrganisms" and the "load" functions.
+	private class Organism{
+		private String name;
+		private String genes;
+		private String aaseqs;
+		private String idmappings;
+		private String prots;
+		private Vector<String> chroms;
+
+		public Organism(String name){
+			this.setName(name);
+			chroms = new Vector<String>();
+		}
+
+		public Vector<String> getLinks(){
+			Vector<String> links = new Vector<String>();
+			if (isGenesAvailable() && Boolean.parseBoolean(Configurator.getProperty("loadGenes", "true"))) links.add(genes);
+			if (isAaseqsAvailable() && Boolean.parseBoolean(Configurator.getProperty("loadAaseqs", "true"))) links.add(aaseqs);
+			if (isIdmappingsAvailable() && Boolean.parseBoolean(Configurator.getProperty("loadIdmappings", "true"))) links.add(idmappings);
+			if (isProtsAvailable() && Boolean.parseBoolean(Configurator.getProperty("loadProts", "true"))) links.add(prots);
+			if (isChromsAvailable() && Boolean.parseBoolean(Configurator.getProperty("loadChroms", "true"))){
+				for (String chrom : chroms) links.add(chrom);
+			}
+			return links;
+		}
+
+		public String getName() {
+			return name;
+		}
+
+		public void setName(String name) {
+			this.name = name;
+		}
+
+		public void setGenes(String genes) {
+			this.genes = genes;
+		}
+
+		public void setAaseqs(String aaseqs) {
+			this.aaseqs = aaseqs;
+		}
+
+		public void setIdmappings(String idmappings) {
+			this.idmappings = idmappings;
+		}
+
+		public void setProts(String prots) {
+			this.prots = prots;
+		}
+
+		public void addChroms(String chroms) {
+			this.chroms.add(chroms);
+		}
+
+		public boolean isGenesAvailable(){
+			return genes!=null;
+		}
+
+		public boolean isAaseqsAvailable(){
+			return aaseqs!=null;
+		}
+
+		public boolean isIdmappingsAvailable(){
+			return idmappings!=null;
+		}
+
+		public boolean isProtsAvailable(){
+			return prots!=null;
+		}
+
+		public boolean isChromsAvailable(){
+			return chroms.size()>0;
+		}
+	}
+
+	// creation of a FileChooser for the path selection buttons
+	private File choosePath(File dir){
+		JFileChooser fc = new JFileChooser();
+		fc.setCurrentDirectory(dir);
+		fc.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);
+
+		int state = fc.showOpenDialog( null );
+
+		if ( state == JFileChooser.APPROVE_OPTION )
+		{
+			return fc.getSelectedFile();
+		}
+		else
+			return null;
+	}
+
+	// updates the filename and parameter fields, used after selecting a file/folder
+	private void updateFields(){
+		tf_outputPath.setText(Configurator.getProperty("outputPath", ""));
+	}
+
+	// tests if an organism and available data is selected to download
+	private boolean isSelectionMissing(){
+		StringBuffer message = new StringBuffer();
+		boolean selectionMissing = false;
+		if (orglist.isSelectionEmpty()){
+			selectionMissing = true;
+			message.append("no organism selected\n");
+		}
+		else {
+			Organism org = orgs.get(orglist.getSelectedIndex());
+			if (!(chckbxGeneAnnotations.isSelected() && org.isGenesAvailable() || 
+					chckbxAminoAcidSequences.isSelected() && org.isAaseqsAvailable()|| 
+					chckbxIdMappings.isSelected() && org.isIdmappingsAvailable() ||
+					chckbxProteome.isSelected() && org.isProtsAvailable() || 
+					chckbxGenome.isSelected() && org.isChromsAvailable())){
+				selectionMissing = true;
+				message.append("no data selected\n");
+			}
+		}
+		if (Configurator.getProperty("outputPath","").matches("")){
+			selectionMissing = true;
+			message.append("no output path indicated\n");
+		}
+		if (selectionMissing){
+			System.out.println(message);
+		}
+		return selectionMissing;
+	}
+
+	// opens a question dialog, asking to overwrite/skip an existing file
+	public boolean questionDialog(String filename) throws ExitException{
+		Object[] options = {"Overwrite", "Skip", "Cancel"};
+		int n = JOptionPane.showOptionDialog(frame,
+				"File already exists:\n" + filename ,
+				"File already exists",
+				JOptionPane.YES_NO_CANCEL_OPTION,
+				JOptionPane.QUESTION_MESSAGE,
+				null,
+				options,
+				options[2]);
+		if (n == 0) return true;
+		else if (n == 1) return false;
+		else throw new ExitException("downloads canceled");
+	}
+
+	// extracts an gz file and reports its progress with the progress bar
+	// tar archives are extracted in the end
+	private void unpack(File file) throws ExitException{
+		try {
+			System.out.println(" extracting " + file.getName() + " (" + (new DecimalFormat("###0.00")).format(file.length()/1024/1024) + " MB)");
+			long size = 0;
+			try{
+				size = getGzSize(file);
+				System.out.println(" expected output size: " + (new DecimalFormat("###0.00")).format(size/1024/1024) + " MB");
+			}
+			catch (IOException e) {
+				System.out.println(" couldn't estimate extracted file size:\n" + 
+						" " + e.getMessage() + "");
+			}
+
+			GzipCompressorInputStream gzIn = new GzipCompressorInputStream(new BufferedInputStream(new FileInputStream(file)));
+			String gzFileName = file.getAbsolutePath();
+			String fileName = gzFileName.substring(0, gzFileName.lastIndexOf('.'));
+			FileOutputStream out = new FileOutputStream(fileName);
+
+			try{
+				progressBar.setValue(0);
+				progressBar.setMaximum(Integer.MAX_VALUE);
+
+				final byte[] buffer = new byte[1024];
+				int n = 0;
+
+				
+				while (-1 != (n = gzIn.read(buffer))) {
+					out.write(buffer, 0, n);
+					progressBar.setValue((int) (((double) progressBar.getValue()) + (((double) n) / ((double) size) * ((double) Integer.MAX_VALUE))));
+					Configurator.checkBreak();
+				}
+				progressBar.setValue(progressBar.getMaximum());
+			}
+			finally{
+				out.close();
+				gzIn.close();
+			}
+			
+			if (fileName.endsWith(".tar")){
+				untar(new File(fileName));
+			}
+		} catch (FileNotFoundException e) {
+			System.out.println(" can not extract file:\n  " + e.getMessage());
+		} catch (IOException e) {
+			System.out.println(" can not extract file:\n  " + e.getMessage());
+		}
+	}
+
+	// extracts original file size of gz packed data from the last four bytes of the file
+	// (see gz file definition for details about that)
+	private long getGzSize(File file) throws IOException{
+		long val = 0;
+		RandomAccessFile raf = new RandomAccessFile(file, "r");
+		raf.seek(raf.length() - 4);
+		long b4 = raf.read();
+		long b3 = raf.read();
+		long b2 = raf.read();
+		long b1 = raf.read();
+		val = (b1 << 24) + (b2 << 16) + (b3 << 8) + b4;			
+		raf.close();
+		return val;
+	}
+
+	// extracts files from a tar archive into a folder named after the archive
+	// reports the progress to the progress bar (in form of ith file of n files)
+	private void untar(File tarfile) throws ExitException{
+		long size = tarfile.length();
+		System.out.println(" extracting " + tarfile.getName() + " (" + (new DecimalFormat("###0.00")).format(size/1024/1024) + " MB)");
+
+		String dirName = tarfile.getParent() + "/" + tarfile.getName().substring(0, tarfile.getName().lastIndexOf(".tar"));
+		File dir = new File(dirName);
+
+		boolean write = true;
+		if (dir.exists()) write = questionDialog(dir.getName());
+		else dir.mkdir();
+
+		if (write){
+			try {
+				InputStream is = new FileInputStream(tarfile);
+				TarArchiveInputStream in = (TarArchiveInputStream) 
+						new ArchiveStreamFactory().createArchiveInputStream("tar", is); 
+
+				try {
+					progressBar.setValue(0);
+					progressBar.setMaximum(Integer.MAX_VALUE);
+					TarArchiveEntry entry;
+					while ((entry = in.getNextTarEntry()) != null){
+						OutputStream out = new FileOutputStream(new File(dir.getAbsolutePath(), entry.getName())); 
+						try {
+							IOUtils.copy(in, out);
+						}
+						finally {
+							out.close();
+						}
+						progressBar.setValue((int) (((double) progressBar.getValue()) + (((double) entry.getSize()) / ((double) size) * ((double) Integer.MAX_VALUE))));
+						Configurator.checkBreak();
+					}
+					progressBar.setValue(progressBar.getMaximum());
+				}
+				finally {
+					in.close();
+				}
+			} catch (FileNotFoundException e) {
+				System.out.println(" can not extract file:\n  " + e.getMessage());
+			} catch (ArchiveException e) {
+				System.out.println(" can not extract file:\n  " + e.getMessage());
+			} catch (IOException e) {
+				System.out.println(" can not extract file:\n  " + e.getMessage());
+			}
+		}
+	}
+}
diff --git a/src/de/rki/ng4/ipig/gui/GeneControlGui.java b/src/de/rki/ng4/ipig/gui/GeneControlGui.java
new file mode 100644
index 0000000..ddf0018
--- /dev/null
+++ b/src/de/rki/ng4/ipig/gui/GeneControlGui.java
@@ -0,0 +1,542 @@
+/* Copyright (c) 2012,
+ * Mathias Kuhring, KuhringM at rki.de, Robert Koch-Institut, Germany, 
+ * All rights reserved. For details, please note the license.txt.
+ */
+
+package de.rki.ng4.ipig.gui;
+
+import java.awt.Color;
+import java.awt.EventQueue;
+import java.awt.event.ActionEvent;
+import java.awt.event.ActionListener;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+
+import javax.swing.JButton;
+import javax.swing.JFileChooser;
+import javax.swing.JFrame;
+import javax.swing.JLabel;
+import javax.swing.JOptionPane;
+import javax.swing.JPanel;
+import javax.swing.JScrollPane;
+import javax.swing.JSpinner;
+import javax.swing.JTextField;
+import javax.swing.SpinnerNumberModel;
+import javax.swing.UIManager;
+import javax.swing.border.TitledBorder;
+import javax.swing.event.ChangeEvent;
+import javax.swing.event.ChangeListener;
+import javax.swing.filechooser.FileNameExtensionFilter;
+
+import de.rki.ng4.ipig.GeneControl;
+import de.rki.ng4.ipig.exceptions.ExitException;
+import de.rki.ng4.ipig.Ipig;
+import de.rki.ng4.ipig.tools.Configurator;
+import java.awt.Toolkit;
+
+/**
+ * <p>This is a GUI class for GeneControl. It offers fields and buttons for necessary 
+ * file indications and options.</p> 
+ * 
+ * <p>The start button will create a new thread which runs the GeneControl main routine 
+ * while the GeneControl output is redirected to a text area.</p>
+ * 
+ * @author Mathias Kuhring
+ *
+ */
+public class GeneControlGui {
+
+	private JFrame frmGeneControlGui;
+	private JTextField tf_geneAnnoFile;
+	private JButton btn_geneAnnoFile;
+	private JLabel lbl_geneAnnoFile;
+	private JTextField tf_geneAaSeqFile;
+	private JTextField tf_refSeqPath;
+	private JTextField tf_outputPath;
+	private OutputJTextArea outputWindow;
+	private JPanel panel_geneAnnoFile;
+	private JPanel resources;
+	private JPanel panel_geneAaSeqFile;
+	private JButton btn_geneAaSeqFile;
+	private JLabel lbl_geneAaSeqFile;
+	private JPanel panel_refSeqPath;
+	private JButton btn_refSeqPath;
+	private JLabel lbl_refSeqPath;
+	private JPanel panel_outputPath;
+	private JPanel output;
+	private JButton btn_outputPath;
+	private JLabel lbl_outputPath;
+	private JPanel execution;
+	private JPanel buttonPanel;
+	private JButton startButton;
+	private JButton stopButton;
+	private JButton exitButton;
+	private JScrollPane scrollPane;
+	private Thread control;
+	private JButton helpButton;
+	private JPanel config;
+	private JPanel configButtons;
+	private JButton loadButton;
+	private JButton saveButton;
+	private JPanel menu;
+	private JPanel menuPanel;
+	private JPanel options;
+	private JPanel panel_similarity;
+	private JLabel lblMinimalSimilarity;
+	private JSpinner spinner;
+
+	/**
+	 * Launch the application.
+	 */
+	public static void main(String[] args) {
+		EventQueue.invokeLater(new Runnable() {
+			public void run() {
+				try {
+					GeneControlGui window = new GeneControlGui();
+					window.frmGeneControlGui.setVisible(true);
+				} catch (Exception e) {
+					e.printStackTrace();
+				}
+			}
+		});
+	}
+
+	/**
+	 * Create the application.
+	 */
+	public GeneControlGui() {
+		initialize();
+		updateFields();
+	}
+
+	/**
+	 * Initialize the contents of the frame.
+	 */
+	private void initialize() {
+		// try native look and feel
+		try {
+			UIManager.setLookAndFeel(UIManager.getSystemLookAndFeelClassName());
+		} catch(Exception e) {
+			System.out.println("Error setting native LAF: " + e);
+		}
+
+		frmGeneControlGui = new JFrame();
+		frmGeneControlGui.setIconImage(Toolkit.getDefaultToolkit().getImage(GeneControlGui.class.getResource("/images/rki.png")));
+		frmGeneControlGui.setResizable(false);
+		frmGeneControlGui.setTitle("GeneControl GUI");
+		frmGeneControlGui.setBounds(100, 100, 500, 600);
+		frmGeneControlGui.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
+		frmGeneControlGui.getContentPane().setLayout(null);
+
+		resources = new JPanel();
+		resources.setBorder(new TitledBorder(null, "Resources", TitledBorder.CENTER, TitledBorder.TOP, null, null));
+		resources.setBounds(10, 11, 472, 152);
+		resources.setLayout(null);
+		frmGeneControlGui.getContentPane().add(resources);
+
+		panel_geneAnnoFile = new JPanel();
+		panel_geneAnnoFile.setLayout(null);
+		panel_geneAnnoFile.setBounds(10, 14, 455, 36);
+		resources.add(panel_geneAnnoFile);
+
+		tf_geneAnnoFile = new JTextField();
+		tf_geneAnnoFile.setEditable(false);
+		tf_geneAnnoFile.setColumns(10);
+		tf_geneAnnoFile.setBounds(0, 14, 356, 20);
+		panel_geneAnnoFile.add(tf_geneAnnoFile);
+
+		btn_geneAnnoFile = new JButton("Open");
+		btn_geneAnnoFile.setBounds(366, 13, 89, 23);
+		panel_geneAnnoFile.add(btn_geneAnnoFile);
+		btn_geneAnnoFile.addActionListener(new ActionListener() {
+			public void actionPerformed(ActionEvent arg0) {
+				File file = chooseFile(new File(tf_geneAnnoFile.getText()), new FileNameExtensionFilter("Text file", "txt"));
+				if (file != null){
+					Configurator.setProperty("FgeneAnnoFile",file.getAbsolutePath());
+					updateFields();
+				}
+			}
+		});
+
+		lbl_geneAnnoFile = new JLabel("UCSC/Ensembl Genes table file:");
+		lbl_geneAnnoFile.setBounds(0, 0, 374, 14);
+		panel_geneAnnoFile.add(lbl_geneAnnoFile);
+
+		panel_geneAaSeqFile = new JPanel();
+		panel_geneAaSeqFile.setLayout(null);
+		panel_geneAaSeqFile.setBounds(10, 59, 455, 36);
+		resources.add(panel_geneAaSeqFile);
+
+		tf_geneAaSeqFile = new JTextField();
+		tf_geneAaSeqFile.setText("");
+		tf_geneAaSeqFile.setEditable(false);
+		tf_geneAaSeqFile.setColumns(10);
+		tf_geneAaSeqFile.setBounds(0, 14, 356, 20);
+		panel_geneAaSeqFile.add(tf_geneAaSeqFile);
+
+		btn_geneAaSeqFile = new JButton("Open");
+		btn_geneAaSeqFile.setBounds(366, 13, 89, 23);
+		btn_geneAaSeqFile.addActionListener(new ActionListener() {
+			public void actionPerformed(ActionEvent arg0) {
+				File file = chooseFile(new File(tf_geneAaSeqFile.getText()), new FileNameExtensionFilter("Text file", "txt"));
+				if (file != null){
+					Configurator.setProperty("FgeneAaSeqFile",file.getAbsolutePath());
+					updateFields();
+				}
+			}
+		});
+		panel_geneAaSeqFile.add(btn_geneAaSeqFile);
+
+		lbl_geneAaSeqFile = new JLabel("UCSC/Ensembl Amino Acid Sequences table file:");
+		lbl_geneAaSeqFile.setBounds(0, 0, 374, 14);
+		panel_geneAaSeqFile.add(lbl_geneAaSeqFile);
+
+		panel_refSeqPath = new JPanel();
+		panel_refSeqPath.setLayout(null);
+		panel_refSeqPath.setBounds(10, 106, 455, 36);
+		resources.add(panel_refSeqPath);
+
+		tf_refSeqPath = new JTextField();
+		tf_refSeqPath.setText("");
+		tf_refSeqPath.setEditable(false);
+		tf_refSeqPath.setColumns(10);
+		tf_refSeqPath.setBounds(0, 14, 356, 20);
+		panel_refSeqPath.add(tf_refSeqPath);
+
+		btn_refSeqPath = new JButton("Open");
+		btn_refSeqPath.setBounds(366, 13, 89, 23);
+		btn_refSeqPath.addActionListener(new ActionListener() {
+			public void actionPerformed(ActionEvent arg0) {
+				File file = choosePath(new File(tf_refSeqPath.getText()));
+				if (file != null){
+					Configurator.setProperty("refSeqPath",file.getAbsolutePath());
+					updateFields();
+				}
+			}
+		});
+		panel_refSeqPath.add(btn_refSeqPath);
+
+		lbl_refSeqPath = new JLabel("Reference chromosomes path:");
+		lbl_refSeqPath.setBounds(0, 0, 374, 14);
+		panel_refSeqPath.add(lbl_refSeqPath);
+
+		output = new JPanel();
+		output.setBorder(new TitledBorder(null, "Output", TitledBorder.CENTER, TitledBorder.TOP, null, null));
+		output.setLayout(null);
+		output.setBounds(10, 174, 472, 59);
+		frmGeneControlGui.getContentPane().add(output);
+
+		panel_outputPath = new JPanel();
+		panel_outputPath.setLayout(null);
+		panel_outputPath.setBounds(10, 14, 455, 36);
+		output.add(panel_outputPath);
+
+		tf_outputPath = new JTextField();
+		tf_outputPath.setText("");
+		tf_outputPath.setEditable(false);
+		tf_outputPath.setColumns(10);
+		tf_outputPath.setBounds(0, 14, 356, 20);
+		panel_outputPath.add(tf_outputPath);
+
+		btn_outputPath = new JButton("Open");
+		btn_outputPath.addActionListener(new ActionListener() {
+			public void actionPerformed(ActionEvent arg0) {
+				File file = choosePath(new File(tf_outputPath.getText()));
+				if (file != null){
+					Configurator.setProperty("FoutputPath",file.getAbsolutePath());
+					updateFields();
+				}
+			}
+		});
+		btn_outputPath.setBounds(366, 13, 89, 23);
+		panel_outputPath.add(btn_outputPath);
+
+		lbl_outputPath = new JLabel("Output Path (optional):");
+		lbl_outputPath.setBounds(0, 0, 374, 14);
+		panel_outputPath.add(lbl_outputPath);
+
+		options = new JPanel();
+		options.setBorder(new TitledBorder(null, "Options", TitledBorder.CENTER, TitledBorder.TOP, null, null));
+		options.setBounds(10, 244, 474, 44);
+		frmGeneControlGui.getContentPane().add(options);
+		options.setLayout(null);
+
+		panel_similarity = new JPanel();
+		panel_similarity.setOpaque(false);
+		panel_similarity.setBounds(162, 14, 150, 20);
+		options.add(panel_similarity);
+		panel_similarity.setLayout(null);
+
+		lblMinimalSimilarity = new JLabel("Minimal similarity:");
+		lblMinimalSimilarity.setBounds(0, 3, 82, 14);
+		panel_similarity.add(lblMinimalSimilarity);
+
+		spinner = new JSpinner();
+		spinner.setModel(new SpinnerNumberModel(0.95, 0.01, 1.0, 0.01));
+		spinner.setBounds(92, 0, 58, 20);
+		spinner.addChangeListener(new ChangeListener() {
+			public void stateChanged(ChangeEvent arg0) {
+				Configurator.setProperty("minSimilarity", spinner.getValue().toString());
+			}
+		});
+		panel_similarity.add(spinner);
+
+		execution = new JPanel();
+		execution.setBorder(new TitledBorder(UIManager.getBorder("TitledBorder.border"), "Execution", TitledBorder.CENTER, TitledBorder.TOP, null, new Color(0, 70, 213)));
+		execution.setLayout(null);
+		execution.setBounds(10, 299, 147, 44);
+		frmGeneControlGui.getContentPane().add(execution);
+
+		buttonPanel = new JPanel();
+		buttonPanel.setOpaque(false);
+		buttonPanel.setLayout(null);
+		buttonPanel.setBounds(6, 14, 135, 23);
+		execution.add(buttonPanel);
+
+		// start button creates and starts a control thread
+		// parameters are the checked in GeneControl
+		startButton = new JButton("Start");
+		startButton.setBounds(0, 0, 65, 23);
+		startButton.addActionListener(new ActionListener() {
+			public void actionPerformed(ActionEvent arg0) {
+				control = new Control();
+				control.start();
+			}
+		});
+		buttonPanel.add(startButton);
+
+		// stop button sets a breakpoint, which is checked by GeneControl periodically.
+		// if the control thread is already dead (e.g. crashed), the button restores the interface by enabling the other buttons itself.
+		stopButton = new JButton("Stop");
+		stopButton.setEnabled(false);
+		stopButton.setBounds(70, 0, 65, 23);
+		stopButton.addActionListener(new ActionListener() {
+			public void actionPerformed(ActionEvent arg0) {
+				if (control != null && control.isAlive()){
+					Configurator.setBreak(true);
+					stopButton.setEnabled(false);
+				}
+				else{
+					enableButtons(true);
+					Configurator.setBreak(false);
+					outputWindow.redirectSystemStreams(false, false);
+				}	
+			}
+		});
+		buttonPanel.add(stopButton);
+
+		config = new JPanel();
+		config.setBorder(new TitledBorder(UIManager.getBorder("TitledBorder.border"), "Configuration", TitledBorder.CENTER, TitledBorder.TOP, null, new Color(0, 70, 213)));
+		config.setLayout(null);
+		config.setBounds(173, 299, 147, 44);
+		frmGeneControlGui.getContentPane().add(config);
+
+		configButtons = new JPanel();
+		configButtons.setLayout(null);
+		configButtons.setOpaque(false);
+		configButtons.setBounds(6, 14, 135, 23);
+		config.add(configButtons);
+
+		// config load button
+		loadButton = new JButton("Load");
+		loadButton.addActionListener(new ActionListener() {
+			public void actionPerformed(ActionEvent arg0) {
+				File file = chooseFile(new File(""), new FileNameExtensionFilter("configuration file", "conf", "txt"));
+				if (file != null){
+					try {
+						Configurator.loadProperties(file.getAbsolutePath());
+						updateFields();
+						outputWindow.setText("configuration loaded");
+					} catch (ExitException e) {
+						outputWindow.setText("error: couldn't load config (" + e.getMessage() + ")");
+					}
+				}
+			}
+		});
+		loadButton.setBounds(0, 0, 65, 23);
+		configButtons.add(loadButton);
+
+		// config save button
+		saveButton = new JButton("Save");
+		saveButton.addActionListener(new ActionListener() {
+			public void actionPerformed(ActionEvent arg0) {
+				File file = saveFile(new File(""), new FileNameExtensionFilter("configuration file", "conf", "txt"));
+				if (file != null){
+					try {
+						Configurator.saveProperties(file.getAbsolutePath());
+						outputWindow.setText("configuration saved");
+					} catch (ExitException e) {
+						outputWindow.setText("error: couldn't save config (" + e.getMessage() + ")");
+					}
+				}
+			}
+		});
+		saveButton.setBounds(70, 0, 65, 23);
+		configButtons.add(saveButton);
+
+		menu = new JPanel();
+		menu.setBorder(new TitledBorder(null, "Menu", TitledBorder.CENTER, TitledBorder.TOP, null, null));
+		menu.setLayout(null);
+		menu.setBounds(335, 299, 147, 44);
+		frmGeneControlGui.getContentPane().add(menu);
+
+		menuPanel = new JPanel();
+		menuPanel.setLayout(null);
+		menuPanel.setOpaque(false);
+		menuPanel.setBounds(6, 14, 135, 23);
+		menu.add(menuPanel);
+
+		// help button, loads a help text from the resources to the output window
+		helpButton = new JButton("Help");
+		helpButton.setBounds(0, 0, 65, 23);
+		menuPanel.add(helpButton);
+		helpButton.addActionListener(new ActionListener() {
+			public void actionPerformed(ActionEvent arg0) {
+				try {
+					String help = "/help/genecontrolgui_help.txt";
+					InputStream is = Ipig.class.getResourceAsStream(help);
+					BufferedReader helpbf = new BufferedReader(new InputStreamReader(is));
+					outputWindow.setText("");
+					while (helpbf.ready()){
+						outputWindow.append(helpbf.readLine() + "\n");
+					}
+					outputWindow.setCaretPosition(0); 
+					is.close();
+				} catch (IOException e) {
+					outputWindow.append("error: " + e.getLocalizedMessage());
+				}
+			}
+		});
+
+		// exit button
+		exitButton = new JButton("Exit");
+		exitButton.setBounds(70, 0, 65, 23);
+		menuPanel.add(exitButton);
+		exitButton.addActionListener(new ActionListener() {
+			public void actionPerformed(ActionEvent arg0) {
+				System.exit(0);
+			}
+		});
+
+		// the output window displays the progress or errors.
+		// OutputJTextArea is a extended JTextArea which catches and displays the System.out stream,
+		// so finally the output from the GeneControl.
+		outputWindow = new OutputJTextArea();
+		outputWindow.setEditable(false);
+		outputWindow.setSize(473, 162);
+		outputWindow.setLocation(10, 400);
+		outputWindow.setLineWrap(true);
+
+		scrollPane = new JScrollPane(outputWindow);
+		scrollPane.setBounds(10, 354, 473, 203);
+		frmGeneControlGui.getContentPane().add(scrollPane);
+	}
+
+	// a thread running GeneControl and controlling the output and buttons meanwhile
+	class Control extends Thread{
+		@Override public void run(){
+			Configurator.setBreak(false);
+			enableButtons(false);
+			outputWindow.setText("");
+			outputWindow.redirectSystemStreams(true, true);
+
+			try {
+				new GeneControl().run();
+			} catch (ExitException e) {
+				System.out.println("\n" + e.getMessage());
+			}
+
+			enableButtons(true);
+			Configurator.setBreak(false);
+			outputWindow.redirectSystemStreams(false, false);
+		}
+	}
+
+	// enabling/disabling of buttons for running the control
+	// stop button is always set contradictory
+	private void enableButtons(boolean b){
+		btn_geneAaSeqFile.setEnabled(b);
+		btn_geneAnnoFile.setEnabled(b);
+		btn_refSeqPath.setEnabled(b);
+		btn_outputPath.setEnabled(b);
+		startButton.setEnabled(b);
+		stopButton.setEnabled(!b); // not b!
+		loadButton.setEnabled(b);
+		saveButton.setEnabled(b);
+		helpButton.setEnabled(b);
+		exitButton.setEnabled(b);
+	}
+
+	// updates the filename and parameter fields after selecting a file/folder or loading a configuration file
+	private void updateFields(){
+		tf_geneAnnoFile.setText(Configurator.getProperty("FgeneAnnoFile", ""));
+		tf_geneAaSeqFile.setText(Configurator.getProperty("FgeneAaSeqFile", ""));
+		tf_refSeqPath.setText(Configurator.getProperty("refSeqPath", ""));
+		tf_outputPath.setText(Configurator.getProperty("FoutputPath", ""));
+
+		try{
+			spinner.setValue(Double.parseDouble(Configurator.getProperty("minSimilarity", "0.95")));
+		}
+		catch (NumberFormatException e){
+			outputWindow.setText("error: couldn't parse \"minSimilarity\" from config file \n" +
+					"(" + e.getMessage() + ") - set parameter manually!");
+		}
+	}
+
+	// creation of a FileChooser for the file selection/open buttons
+	private File chooseFile(File dir, FileNameExtensionFilter filter){
+		JFileChooser fc = new JFileChooser();
+		fc.setFileFilter(filter);
+		fc.setCurrentDirectory(dir);
+
+		int state = fc.showOpenDialog( null );
+
+		if ( state == JFileChooser.APPROVE_OPTION )
+		{
+			return fc.getSelectedFile();
+		}
+		else
+			return null;
+	}
+
+	// creation of a FileChooser for the config save button
+	private File saveFile(File dir, FileNameExtensionFilter filter){
+		JFileChooser fc = new JFileChooser();
+		fc.setFileFilter(filter);
+		fc.setCurrentDirectory(dir);
+		fc.setSelectedFile( new File("mapper.conf") );
+
+		while ( fc.showSaveDialog( null ) == JFileChooser.APPROVE_OPTION ){
+			File file = fc.getSelectedFile();
+			if (file.exists())  {  
+				int answer = JOptionPane.showConfirmDialog(null, "Replace existing file?");  
+				if (answer == JOptionPane.OK_OPTION)  
+					return fc.getSelectedFile();
+				if (answer == JOptionPane.CANCEL_OPTION)
+					break;
+			} 
+			else
+				return fc.getSelectedFile();
+		}
+		return null;
+	}
+
+	// creation of a FileChooser for the path selection buttons
+	private File choosePath(File dir){
+		JFileChooser fc = new JFileChooser();
+		fc.setCurrentDirectory(dir);
+		fc.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);
+
+		int state = fc.showOpenDialog( null );
+
+		if ( state == JFileChooser.APPROVE_OPTION )
+		{
+			return fc.getSelectedFile();
+		}
+		else
+			return null;
+	}
+}
diff --git a/src/de/rki/ng4/ipig/gui/IpigGui.java b/src/de/rki/ng4/ipig/gui/IpigGui.java
new file mode 100644
index 0000000..2593d8c
--- /dev/null
+++ b/src/de/rki/ng4/ipig/gui/IpigGui.java
@@ -0,0 +1,762 @@
+/* Copyright (c) 2012,
+ * Mathias Kuhring, KuhringM at rki.de, Robert Koch-Institut, Germany, 
+ * All rights reserved. For details, please note the license.txt.
+ */
+
+package de.rki.ng4.ipig.gui;
+
+import java.awt.Color;
+import java.awt.EventQueue;
+import java.awt.event.ActionEvent;
+import java.awt.event.ActionListener;
+import java.awt.event.FocusAdapter;
+import java.awt.event.FocusEvent;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+
+import javax.swing.JButton;
+import javax.swing.JFileChooser;
+import javax.swing.JFrame;
+import javax.swing.JLabel;
+import javax.swing.JOptionPane;
+import javax.swing.JPanel;
+import javax.swing.JScrollPane;
+import javax.swing.JSpinner;
+import javax.swing.JTextField;
+import javax.swing.SwingConstants;
+import javax.swing.UIManager;
+import javax.swing.border.TitledBorder;
+import javax.swing.event.ChangeEvent;
+import javax.swing.event.ChangeListener;
+import javax.swing.filechooser.FileNameExtensionFilter;
+
+import de.rki.ng4.ipig.Ipig;
+import de.rki.ng4.ipig.exceptions.ExitException;
+import de.rki.ng4.ipig.tools.Configurator;
+import java.awt.Toolkit;
+
+
+/**
+ * <p>This is a GUI class for iPiG. It offers fields and buttons for necessary file indications
+ *  and options.</p> 
+ * 
+ * <p>The start button will create a new thread which runs the iPiG main routine 
+ * while the iPiG output is redirected to a text area.</p>
+ * 
+ * @author Mathias Kuhring
+ *
+ */
+public class IpigGui {
+	private JFrame frmipigGui;
+	private JTextField tf_geneAnnoFile;
+	private JButton btn_geneAnnoFile;
+	private JLabel lbl_geneAnnoFile;
+	private JTextField tf_geneAaSeqFile;
+	private JButton btn_geneAaSeqFile;
+	private JLabel lbl_geneAaSeqFile;
+	private JTextField tf_protMapFile;
+	private JButton btn_protMapFile;
+	private JLabel lbl_protMapFile;
+	private JTextField tf_protSeqFile;
+	private JButton btn_protSeqFile;
+	private JLabel lbl_protSeqFile;
+	private JButton btn_start;
+	private OutputJTextArea outputWindow;
+	private Thread mapper;
+	private JButton btn_stop;
+	private JTextField tf_msPepFile;
+	private JButton btn_msPepFile;
+	private JLabel lbl_msPepFile;
+	private JPanel input;
+	private JPanel resources;
+	private JPanel settings;
+	private JPanel panel_button;
+	private JPanel panel_geneAnnoFile;
+	private JPanel panel_geneAaSeqFile;
+	private JPanel panel_protMapFile;
+	private JPanel panel_protSeqFile;
+	private JPanel panel_msPepFile;
+	private JLabel lblScoreRange;
+	private JSpinner sp_minScore;
+	private JLabel lblTo;
+	private JSpinner sp_maxScore;
+	private JLabel lblThresholds;
+	private JSpinner sp_threshold1;
+	private JLabel lblAnd;
+	private JSpinner sp_threshold2;
+	private JPanel panel_11;
+	private JPanel panel_9;
+	private JLabel lblColors;
+	private JTextField color1;
+	private JTextField color2;
+	private JTextField color3;
+	private JPanel panel_10;
+	private JPanel panel_12;
+	private JPanel execution;
+	private JPanel config;
+	private JPanel panel_1;
+	private JButton btn_load;
+	private JButton btn_save;
+	private JPanel menu;
+	private JPanel panel_3;
+	private JButton btn_help;
+	private JButton btn_exit;
+	private JPanel panel_outputPath;
+	private JTextField tf_outputPath;
+	private JButton btn_outputPath;
+	private JLabel lbl_outputPath;
+
+	/**
+	 * Launch the application.
+	 */
+	public static void main(String[] args) {
+		EventQueue.invokeLater(new Runnable() {
+			public void run() {
+				try {
+					IpigGui window = new IpigGui();
+					window.frmipigGui.setVisible(true);
+				} catch (Exception e) {
+					e.printStackTrace();
+				}
+			}
+		});
+	}
+
+	/**
+	 * Create the application.
+	 */
+	public IpigGui() {
+		initialize();
+		updateFields();
+	}
+
+	/**
+	 * Initialize the contents of the frame.
+	 */
+	private void initialize() {
+		// try native look and feel
+		try {
+			UIManager.setLookAndFeel(UIManager.getSystemLookAndFeelClassName());
+		} catch(Exception e) {
+			System.out.println("Error setting native LAF: " + e);
+		}
+
+		frmipigGui = new JFrame();
+		frmipigGui.setIconImage(Toolkit.getDefaultToolkit().getImage(IpigGui.class.getResource("/images/rki.png")));
+		frmipigGui.setResizable(false);
+		frmipigGui.setTitle("iPiG GUI");
+		frmipigGui.setBounds(100, 100, 500, 600);
+		frmipigGui.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
+		frmipigGui.getContentPane().setLayout(null);
+
+		input = new JPanel();
+		input.setBorder(new TitledBorder(UIManager.getBorder("TitledBorder.border"), "Input", TitledBorder.CENTER, TitledBorder.TOP, null, new Color(0, 70, 213)));
+		input.setBounds(10, 10, 473, 100);
+		frmipigGui.getContentPane().add(input);
+		input.setLayout(null);
+
+		panel_msPepFile = new JPanel();
+		panel_msPepFile.setBounds(10, 14, 455, 36);
+		input.add(panel_msPepFile);
+		panel_msPepFile.setLayout(null);
+
+		tf_msPepFile = new JTextField();
+		tf_msPepFile.setBounds(0, 14, 356, 20);
+		panel_msPepFile.add(tf_msPepFile);
+		tf_msPepFile.setEditable(false);
+		tf_msPepFile.setText("");
+		tf_msPepFile.setColumns(10);
+
+		btn_msPepFile = new JButton("Open");
+		btn_msPepFile.setBounds(366, 13, 89, 23);
+		btn_msPepFile.addActionListener(new ActionListener() {
+			public void actionPerformed(ActionEvent arg0) {
+				File file = chooseFile(new File(tf_msPepFile.getText()), new FileNameExtensionFilter("Peptides (txt, mzid)", "txt", "mzid"));
+				if (file != null){
+					Configurator.setSysProperty("msPepFile", file.getAbsolutePath());
+					Configurator.setSysProperty("msPepSetName", file.getName().substring(0,file.getName().lastIndexOf(".")));
+					updateFields();
+				}
+			}
+		});
+		panel_msPepFile.add(btn_msPepFile);
+
+		lbl_msPepFile = new JLabel("Peptide Spectrum Matches:");
+		lbl_msPepFile.setBounds(0, 0, 374, 14);
+		panel_msPepFile.add(lbl_msPepFile);
+
+		panel_outputPath = new JPanel();
+		panel_outputPath.setLayout(null);
+		panel_outputPath.setBounds(10, 54, 455, 36);
+		input.add(panel_outputPath);
+
+		tf_outputPath = new JTextField();
+		tf_outputPath.setText("");
+		tf_outputPath.setEditable(false);
+		tf_outputPath.setColumns(10);
+		tf_outputPath.setBounds(0, 14, 356, 20);
+		panel_outputPath.add(tf_outputPath);
+
+		btn_outputPath = new JButton("Open");
+		btn_outputPath.setBounds(366, 13, 89, 23);
+		btn_outputPath.addActionListener(new ActionListener() {
+			public void actionPerformed(ActionEvent arg0) {
+				File file = choosePath(new File(tf_outputPath.getText()));
+				if (file != null){
+					Configurator.setProperty("outputPath",file.getAbsolutePath());
+					updateFields();
+				}
+			}
+		});
+		panel_outputPath.add(btn_outputPath);
+
+		lbl_outputPath = new JLabel("Output Path (optional):");
+		lbl_outputPath.setBounds(0, 0, 374, 14);
+
+		panel_outputPath.add(lbl_outputPath);
+
+		resources = new JPanel();
+		resources.setBorder(new TitledBorder(UIManager.getBorder("TitledBorder.border"), "Resources", TitledBorder.CENTER, TitledBorder.TOP, null, new Color(0, 70, 213)));
+		resources.setBounds(10, 110, 473, 197);
+		frmipigGui.getContentPane().add(resources);
+		resources.setLayout(null);
+
+		panel_geneAnnoFile = new JPanel();
+		panel_geneAnnoFile.setBounds(10, 14, 455, 36);
+		resources.add(panel_geneAnnoFile);
+		panel_geneAnnoFile.setLayout(null);
+
+		tf_geneAnnoFile = new JTextField();
+		tf_geneAnnoFile.setBounds(0, 14, 356, 20);
+		panel_geneAnnoFile.add(tf_geneAnnoFile);
+		tf_geneAnnoFile.setEditable(false);
+		tf_geneAnnoFile.setColumns(10);
+
+		btn_geneAnnoFile = new JButton("Open");
+		btn_geneAnnoFile.setBounds(366, 13, 89, 23);
+		panel_geneAnnoFile.add(btn_geneAnnoFile);
+
+		lbl_geneAnnoFile = new JLabel("UCSC/Ensembl Genes table file:");
+		lbl_geneAnnoFile.setBounds(0, 0, 374, 14);
+		panel_geneAnnoFile.add(lbl_geneAnnoFile);
+		btn_geneAnnoFile.addActionListener(new ActionListener() {
+			public void actionPerformed(ActionEvent arg0) {
+				File file = chooseFile(new File(tf_geneAnnoFile.getText()), new FileNameExtensionFilter("Text file", "txt"));
+				if (file != null){
+					Configurator.setProperty("geneAnnoFile",file.getAbsolutePath());
+					updateFields();
+				}
+			}
+		});
+
+		panel_geneAaSeqFile = new JPanel();
+		panel_geneAaSeqFile.setBounds(10, 59, 455, 36);
+		resources.add(panel_geneAaSeqFile);
+		panel_geneAaSeqFile.setLayout(null);
+
+		tf_geneAaSeqFile = new JTextField();
+		tf_geneAaSeqFile.setBounds(0, 14, 356, 20);
+		panel_geneAaSeqFile.add(tf_geneAaSeqFile);
+		tf_geneAaSeqFile.setEditable(false);
+		tf_geneAaSeqFile.setColumns(10);
+
+		btn_geneAaSeqFile = new JButton("Open");
+		btn_geneAaSeqFile.setBounds(366, 13, 89, 23);
+		btn_geneAaSeqFile.addActionListener(new ActionListener() {
+			public void actionPerformed(ActionEvent arg0) {
+				File file = chooseFile(new File(tf_geneAaSeqFile.getText()), new FileNameExtensionFilter("Text file", "txt"));
+				if (file != null){
+					Configurator.setProperty("geneAaSeqFile",file.getAbsolutePath());
+					updateFields();
+				}
+			}
+		});
+		panel_geneAaSeqFile.add(btn_geneAaSeqFile);
+
+		lbl_geneAaSeqFile = new JLabel("UCSC/Ensembl Amino Acid Sequences table file:");
+		lbl_geneAaSeqFile.setBounds(0, 0, 374, 14);
+		panel_geneAaSeqFile.add(lbl_geneAaSeqFile);
+
+		panel_protMapFile = new JPanel();
+		panel_protMapFile.setBounds(10, 106, 455, 36);
+		panel_protMapFile.setLayout(null);
+		resources.add(panel_protMapFile);
+
+		tf_protMapFile = new JTextField();
+		tf_protMapFile.setBounds(0, 14, 356, 20);
+		panel_protMapFile.add(tf_protMapFile);
+		tf_protMapFile.setEditable(false);
+		tf_protMapFile.setColumns(10);
+
+		btn_protMapFile = new JButton("Open");
+		btn_protMapFile.setBounds(366, 13, 89, 23);
+		panel_protMapFile.add(btn_protMapFile);
+
+		lbl_protMapFile = new JLabel("UniProt ID-mapping file (optional but recommended):");
+		lbl_protMapFile.setBounds(0, 0, 374, 14);
+		panel_protMapFile.add(lbl_protMapFile);
+		btn_protMapFile.addActionListener(new ActionListener() {
+			public void actionPerformed(ActionEvent arg0) {
+				File file = chooseFile(new File(tf_protMapFile.getText()), new FileNameExtensionFilter("Tab file", "tab" ));
+				if (file != null){
+					Configurator.setProperty("protMapFile",file.getAbsolutePath());
+					updateFields();
+				}
+			}
+		});
+
+		panel_protSeqFile = new JPanel();
+		panel_protSeqFile.setBounds(10, 151, 455, 36);
+		resources.add(panel_protSeqFile);
+		panel_protSeqFile.setLayout(null);
+
+		tf_protSeqFile = new JTextField();
+		tf_protSeqFile.setBounds(0, 14, 356, 20);
+		panel_protSeqFile.add(tf_protSeqFile);
+		tf_protSeqFile.setEditable(false);
+		tf_protSeqFile.setColumns(10);
+
+		btn_protSeqFile = new JButton("Open");
+		btn_protSeqFile.setBounds(366, 13, 89, 23);
+		panel_protSeqFile.add(btn_protSeqFile);
+
+		lbl_protSeqFile = new JLabel("Proteome fasta file (optional):");
+		lbl_protSeqFile.setBounds(0, 0, 374, 14);
+		panel_protSeqFile.add(lbl_protSeqFile);
+
+		btn_protSeqFile.addActionListener(new ActionListener() {
+			public void actionPerformed(ActionEvent arg0) {
+				File file = chooseFile(new File(tf_protSeqFile.getText()), new FileNameExtensionFilter("Fasta file", "fasta"));
+				if (file != null){
+					Configurator.setProperty("protSeqFile",file.getAbsolutePath());
+					updateFields();
+				}
+			}
+		});
+
+		settings = new JPanel();
+		settings.setBorder(new TitledBorder(UIManager.getBorder("TitledBorder.border"), "Export Settings", TitledBorder.CENTER, TitledBorder.TOP, null, new Color(0, 70, 213)));
+		settings.setBounds(10, 307, 473, 68);
+		frmipigGui.getContentPane().add(settings);
+		settings.setLayout(null);
+
+		panel_12 = new JPanel();
+		panel_12.setBounds(10, 16, 453, 20);
+		settings.add(panel_12);
+		panel_12.setLayout(null);
+
+		panel_11 = new JPanel();
+		panel_11.setBounds(0, 0, 218, 20);
+		panel_12.add(panel_11);
+		panel_11.setLayout(null);
+
+		lblScoreRange = new JLabel("Score range:");
+		lblScoreRange.setBounds(0, 3, 62, 14);
+		panel_11.add(lblScoreRange);
+
+
+		sp_minScore = new JSpinner();
+		sp_minScore.addChangeListener(new ChangeListener() {
+			public void stateChanged(ChangeEvent arg0) {
+				Configurator.setProperty("minScore", sp_minScore.getValue().toString());
+			}
+		});
+		sp_minScore.setBounds(72, 0, 63, 20);
+		panel_11.add(sp_minScore);
+
+		lblTo = new JLabel("-");
+		lblTo.setBounds(145, 3, 4, 14);
+		panel_11.add(lblTo);
+		lblTo.setHorizontalAlignment(SwingConstants.CENTER);
+
+		sp_maxScore = new JSpinner();
+		sp_maxScore.setBounds(155, 0, 63, 20);
+		sp_maxScore.addChangeListener(new ChangeListener() {
+			public void stateChanged(ChangeEvent arg0) {
+				Configurator.setProperty("maxScore", sp_maxScore.getValue().toString());
+			}
+		});
+		panel_11.add(sp_maxScore);
+
+		panel_9 = new JPanel();
+		panel_9.setBounds(225, 0, 228, 20);
+		panel_12.add(panel_9);
+		panel_9.setLayout(null);
+
+		lblThresholds = new JLabel("Thresholds:");
+		lblThresholds.setBounds(6, 3, 56, 14);
+		panel_9.add(lblThresholds);
+
+		sp_threshold1 = new JSpinner();
+		sp_threshold1.setBounds(72, 0, 63, 20);
+		sp_threshold1.addChangeListener(new ChangeListener() {
+			public void stateChanged(ChangeEvent arg0) {
+				Configurator.setProperty("threshold1", sp_threshold1.getValue().toString());
+			}
+		});
+		panel_9.add(sp_threshold1);
+
+		lblAnd = new JLabel("&");
+		lblAnd.setBounds(145, 3, 7, 14);
+		panel_9.add(lblAnd);
+		lblAnd.setHorizontalAlignment(SwingConstants.CENTER);
+
+		sp_threshold2 = new JSpinner();
+		sp_threshold2.setBounds(165, 0, 63, 20);
+		sp_threshold2.addChangeListener(new ChangeListener() {
+			public void stateChanged(ChangeEvent arg0) {
+				Configurator.setProperty("threshold2", sp_threshold2.getValue().toString());
+			}
+		});
+		panel_9.add(sp_threshold2);
+
+		panel_10 = new JPanel();
+		panel_10.setBounds(74, 40, 324, 20);
+		settings.add(panel_10);
+		panel_10.setLayout(null);
+
+		lblColors = new JLabel("Colors:");
+		lblColors.setBounds(0, 3, 34, 14);
+		panel_10.add(lblColors);
+
+		color1 = new JTextField();
+		color1.addFocusListener(new FocusAdapter() {
+			@Override
+			public void focusLost(FocusEvent arg0) {
+				Configurator.setProperty("color1", color1.getText());
+			}
+		});
+		color1.addActionListener(new ActionListener() {
+			public void actionPerformed(ActionEvent arg0) {
+				Configurator.setProperty("color1", color1.getText());
+			}
+		});
+		color1.setBounds(46, 0, 86, 20);
+		panel_10.add(color1);
+		color1.setColumns(10);
+
+		color2 = new JTextField();
+		color2.addFocusListener(new FocusAdapter() {
+			@Override
+			public void focusLost(FocusEvent arg0) {
+				Configurator.setProperty("color2", color2.getText());
+			}
+		});
+		color2.addActionListener(new ActionListener() {
+			public void actionPerformed(ActionEvent arg0) {
+				Configurator.setProperty("color2", color2.getText());
+			}
+		});
+		color2.setBounds(142, 0, 86, 20);
+		panel_10.add(color2);
+		color2.setColumns(10);
+
+		color3 = new JTextField();
+		color3.addFocusListener(new FocusAdapter() {
+			@Override
+			public void focusLost(FocusEvent arg0) {
+				Configurator.setProperty("color3", color2.getText());
+			}
+		});
+		color3.addActionListener(new ActionListener() {
+			public void actionPerformed(ActionEvent arg0) {
+				Configurator.setProperty("color3", color3.getText());
+			}
+		});
+		color3.setBounds(238, 0, 86, 20);
+		panel_10.add(color3);
+		color3.setColumns(10);
+
+		execution = new JPanel();
+		execution.setBounds(10, 375, 147, 44);
+		frmipigGui.getContentPane().add(execution);
+		execution.setBorder(new TitledBorder(UIManager.getBorder("TitledBorder.border"), "Execution", TitledBorder.CENTER, TitledBorder.TOP, null, new Color(0, 70, 213)));
+		execution.setLayout(null);
+
+		panel_button = new JPanel();
+		panel_button.setBounds(6, 14, 135, 23);
+		execution.add(panel_button);
+		panel_button.setLayout(null);
+
+		// start button creates and starts a mapper thread
+		// parameters are the checked in ipig
+		btn_start = new JButton("Start");
+		btn_start.setBounds(0, 0, 65, 23);
+		panel_button.add(btn_start);
+		btn_start.addActionListener(new ActionListener() {
+			public void actionPerformed(ActionEvent arg0) {
+				mapper = new Mapper();
+				mapper.start();
+			}
+		});
+
+		// stop button sets a breakpoint, which is checked by the ipig periodically.
+		// if the mapper thread is already dead (e.g. crashed), the button restores the interface by enabling the other buttons itself.
+		btn_stop = new JButton("Stop");
+		btn_stop.setEnabled(false);
+		btn_stop.setBounds(70, 0, 65, 23);
+		panel_button.add(btn_stop);
+		btn_stop.addActionListener(new ActionListener() {
+			public void actionPerformed(ActionEvent arg0) {
+				if (mapper != null && mapper.isAlive()){
+					Configurator.setBreak(true);
+					btn_stop.setEnabled(false);
+				}
+				else{
+					enableButtons(true);
+					Configurator.setBreak(false);
+					outputWindow.redirectSystemStreams(false, false);
+				}	
+			}
+		});
+
+		config = new JPanel();
+		config.setBorder(new TitledBorder(UIManager.getBorder("TitledBorder.border"), "Configuration", TitledBorder.CENTER, TitledBorder.TOP, null, new Color(0, 70, 213)));
+		config.setLayout(null);
+		config.setBounds(173, 375, 147, 44);
+		frmipigGui.getContentPane().add(config);
+
+		panel_1 = new JPanel();
+		panel_1.setLayout(null);
+		panel_1.setOpaque(false);
+		panel_1.setBounds(6, 14, 135, 23);
+		config.add(panel_1);
+
+		// config load button
+		btn_load = new JButton("Load");
+		btn_load.setBounds(0, 0, 65, 23);
+		btn_load.addActionListener(new ActionListener() {
+			public void actionPerformed(ActionEvent arg0) {
+				File file = chooseFile(new File(""), new FileNameExtensionFilter("configuration file", "conf", "txt"));
+				if (file != null){
+					try {
+						Configurator.loadProperties(file.getAbsolutePath());
+						updateFields();
+						outputWindow.setText("configuration loaded");
+					} catch (ExitException e) {
+						outputWindow.setText("error: couldn't load config (" + e.getMessage() + ")");
+					}
+				}
+			}
+		});
+		panel_1.add(btn_load);
+
+		// config save button
+		btn_save = new JButton("Save");
+		btn_save.setBounds(70, 0, 65, 23);
+		btn_save.addActionListener(new ActionListener() {
+			public void actionPerformed(ActionEvent arg0) {
+				File file = saveFile(new File(""), new FileNameExtensionFilter("configuration file", "conf", "txt"));
+				if (file != null){
+					try {
+						Configurator.saveProperties(file.getAbsolutePath());
+						outputWindow.setText("configuration saved");
+					} catch (ExitException e) {
+						outputWindow.setText("error: couldn't save config (" + e.getMessage() + ")");
+					}
+				}
+			}
+		});
+		panel_1.add(btn_save);
+
+		menu = new JPanel();
+		menu.setBorder(new TitledBorder(null, "Menu", TitledBorder.CENTER, TitledBorder.TOP, null, null));
+		menu.setLayout(null);
+		menu.setBounds(336, 375, 147, 44);
+		frmipigGui.getContentPane().add(menu);
+
+		panel_3 = new JPanel();
+		panel_3.setLayout(null);
+		panel_3.setOpaque(false);
+		panel_3.setBounds(6, 14, 135, 23);
+		menu.add(panel_3);
+
+		// help button, loads a help text from the resources to the output window
+		btn_help = new JButton("Help");
+		btn_help.setBounds(0, 0, 65, 23);
+		btn_help.addActionListener(new ActionListener() {
+			public void actionPerformed(ActionEvent arg0) {
+				try {
+					String help = "/help/ipiggui_help.txt";
+					InputStream is = Ipig.class.getResourceAsStream(help);
+					BufferedReader helpbf = new BufferedReader(new InputStreamReader(is));
+					outputWindow.setText("");
+					while (helpbf.ready()){
+						outputWindow.append(helpbf.readLine() + "\n");
+					}
+					outputWindow.setCaretPosition(0); 
+					is.close();
+				} catch (IOException e) {
+					outputWindow.append("error: " + e.getLocalizedMessage());
+				}
+			}
+		});
+		panel_3.add(btn_help);
+
+		// exit button
+		btn_exit = new JButton("Exit");
+		btn_exit.setBounds(70, 0, 65, 23);
+		btn_exit.addActionListener(new ActionListener() {
+			public void actionPerformed(ActionEvent arg0) {
+				System.exit(0);
+			}
+		});
+		panel_3.add(btn_exit);
+
+		// the output window displays the progress or errors.
+		// OutputJTextArea is a extended JTextArea which catches and displays the System.out stream,
+		// so finally the output from the ipig.
+		outputWindow = new OutputJTextArea();
+		outputWindow.setEditable(false);
+		outputWindow.setSize(473, 157);
+		outputWindow.setLocation(10, 400);
+		outputWindow.setLineWrap(true);
+
+		JScrollPane scrollPane = new JScrollPane(outputWindow);
+		scrollPane.setBounds(10, 424, 473, 133);
+		frmipigGui.getContentPane().add(scrollPane);
+	}
+
+	// a thread running the ipig and controlling the output and buttons meanwhile
+	class Mapper extends Thread{
+		@Override public void run(){
+			Configurator.setBreak(false);
+			enableButtons(false);
+			outputWindow.setText("");
+			outputWindow.redirectSystemStreams(true, true);
+
+			try {
+				Ipig.run();
+			} catch (ExitException e) {
+				System.out.println("\n" + e.getMessage());
+			}
+
+			System.gc();
+			enableButtons(true);
+			Configurator.setBreak(false);
+			outputWindow.redirectSystemStreams(false, false);
+		}
+	}
+
+	// enabling/disabling of buttons for running the mapper
+	// stop button is always set contradictory
+	private void enableButtons(boolean b){
+		btn_msPepFile.setEnabled(b);
+		btn_geneAaSeqFile.setEnabled(b);
+		btn_geneAnnoFile.setEnabled(b);
+		btn_protMapFile.setEnabled(b);
+		btn_protSeqFile.setEnabled(b);
+
+		btn_start.setEnabled(b);
+		btn_stop.setEnabled(!b); // not b!		
+		btn_load.setEnabled(b);
+		btn_save.setEnabled(b);
+		btn_help.setEnabled(b);
+		btn_exit.setEnabled(b);
+
+		btn_outputPath.setEnabled(b);
+
+		settings.setEnabled(b);
+	}
+
+	// updates the filename and parameter fields after selecting a file/folder or loading a configuration file
+	private void updateFields(){
+		tf_geneAnnoFile.setText(Configurator.getProperty("geneAnnoFile", ""));
+		tf_geneAaSeqFile.setText(Configurator.getProperty("geneAaSeqFile", ""));
+		tf_protMapFile.setText(Configurator.getProperty("protMapFile", ""));
+		tf_protSeqFile.setText(Configurator.getProperty("protSeqFile", ""));
+		tf_msPepFile.setText(Configurator.getSysProperty("msPepFile", ""));
+
+		tf_outputPath.setText(Configurator.getProperty("outputPath", ""));
+
+		try{
+			sp_minScore.setValue(Integer.parseInt(Configurator.getProperty("minScore", "0")));
+		}
+		catch (NumberFormatException e){
+			outputWindow.setText("error: couldn't parse \"minScore\" from config file \n" +
+					"(" + e.getMessage() + ") - set parameter manually!");
+		}
+		try{
+			sp_maxScore.setValue(Integer.parseInt(Configurator.getProperty("maxScore", "0")));
+		}
+		catch (NumberFormatException e){
+			outputWindow.setText("error: couldn't parse \"maxScore\" from config file \n" +
+					"(" + e.getMessage() + ") - set parameter manually!");
+		}
+		try{
+			sp_threshold1.setValue(Integer.parseInt(Configurator.getProperty("threshold1", "0")));
+		}
+		catch (NumberFormatException e){
+			outputWindow.setText("error: couldn't parse \"threshold1\" from config file \n" +
+					"(" + e.getMessage() + ") - set parameter manually!");
+		}
+		try{
+			sp_threshold2.setValue(Integer.parseInt(Configurator.getProperty("threshold2", "0")));
+		}
+		catch (NumberFormatException e){
+			outputWindow.setText("error: couldn't parse \"threshold2\" from config file \n" +
+					"(" + e.getMessage() + ") - set parameter manually!");
+		}
+
+		color1.setText(Configurator.getProperty("color1", ""));
+		color2.setText(Configurator.getProperty("color2", ""));
+		color3.setText(Configurator.getProperty("color3", ""));
+	}
+
+	// creation of a FileChooser for the file selection/open buttons
+	private File chooseFile(File dir, FileNameExtensionFilter filter){
+		JFileChooser fc = new JFileChooser();
+		fc.setFileFilter(filter);
+		fc.setCurrentDirectory(dir);
+
+		int state = fc.showOpenDialog( null );
+
+		if ( state == JFileChooser.APPROVE_OPTION )
+		{
+			return fc.getSelectedFile();
+		}
+		else
+			return null;
+	}
+
+	// creation of a FileChooser for the config save button
+	private File saveFile(File dir, FileNameExtensionFilter filter){
+		JFileChooser fc = new JFileChooser();
+		fc.setFileFilter(filter);
+		fc.setCurrentDirectory(dir);
+		fc.setSelectedFile( new File("mapper.conf") );
+
+		while ( fc.showSaveDialog( null ) == JFileChooser.APPROVE_OPTION ){
+			File file = fc.getSelectedFile();
+			if (file.exists())  {  
+				int answer = JOptionPane.showConfirmDialog(null, "Replace existing file?");  
+				if (answer == JOptionPane.OK_OPTION)  
+					return fc.getSelectedFile();
+				if (answer == JOptionPane.CANCEL_OPTION)
+					break;
+			} 
+			else
+				return fc.getSelectedFile();
+		}
+		return null;
+	}
+
+	// creation of a FileChooser for the path selection buttons
+	private File choosePath(File dir){
+		JFileChooser fc = new JFileChooser();
+		fc.setCurrentDirectory(dir);
+		fc.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);
+
+		int state = fc.showOpenDialog( null );
+
+		if ( state == JFileChooser.APPROVE_OPTION )
+		{
+			return fc.getSelectedFile();
+		}
+		else
+			return null;
+	}
+}
diff --git a/src/de/rki/ng4/ipig/gui/OutputJTextArea.java b/src/de/rki/ng4/ipig/gui/OutputJTextArea.java
new file mode 100644
index 0000000..bf4b5ec
--- /dev/null
+++ b/src/de/rki/ng4/ipig/gui/OutputJTextArea.java
@@ -0,0 +1,111 @@
+/* Copyright (c) 2012,
+ * Mathias Kuhring, KuhringM at rki.de, Robert Koch-Institut, Germany, 
+ * All rights reserved. For details, please note the license.txt.
+ */
+
+package de.rki.ng4.ipig.gui;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.PrintStream;
+
+import javax.swing.JTextArea;
+import javax.swing.SwingUtilities;
+
+/**
+ * <p>A extension of a JTextArea which is able to redirect (catch and print) the system output stream and system error stream.</p>
+ * 
+ * <p>Source of main idea: <a href="http://unserializableone.blogspot.com/2009/01/redirecting-systemout-and-systemerr-to.html">
+ * http://unserializableone.blogspot.com/2009/01/redirecting-systemout-and-systemerr-to.html</a></p>
+ * 
+ * @author Mathias Kuhring
+ *
+ */
+public class OutputJTextArea extends JTextArea {
+
+	private static final long serialVersionUID = 1L;
+	
+	private OutputStream newout;
+	private PrintStream stdout = System.out;
+	private PrintStream stderr = System.err;
+	
+	/**
+	 * Simple constructor which prepares the redirection.
+	 */
+	public OutputJTextArea(){
+		super();
+		init();
+	}
+
+	/**
+	 * This constructor prepares the redirection and initiates the text area with a passed String.
+	 * 
+	 * @param text text to be printed in the text area
+	 */
+	public OutputJTextArea(String text){
+		super(text);
+		init();
+	}
+	
+	/**
+	 * This constructor prepares the redirection and creates the text area in a given size (rows and columns).
+	 * 
+	 * @param rows number of rows
+	 * @param cols number of columns
+	 */
+	public OutputJTextArea(int rows, int cols){
+		super(rows, cols);
+		init();
+	}
+	
+	// initiates and adapts an output stream
+	private void init(){
+		newout = new OutputStream() {
+			@Override
+			public void write(int b) throws IOException {
+				updateTextArea(String.valueOf((char) b));
+			}
+
+			@Override
+			public void write(byte[] b, int off, int len) throws IOException {
+				updateTextArea(new String(b, off, len));
+			}
+
+			@Override
+			public void write(byte[] b) throws IOException {
+				write(b, 0, b.length);
+			}
+		};
+	}
+
+	// used by the output stream to write into the text area
+	private void updateTextArea(final String text) {
+		SwingUtilities.invokeLater(new Runnable() {
+			public void run() {
+				append(text);
+			}
+		});
+	}
+
+	/**
+	 * <p>Controls the redirection of the output stream and error stream.</p>
+	 *  
+	 * @param out set true for redirection into the text area, false to use the standard output stream
+	 * @param err set true for redirection into the text area, false to use the standard error stream
+	 */
+	public void redirectSystemStreams(boolean out, boolean err) {
+		if (out){
+			System.setOut(new PrintStream(newout, true));
+		}
+		else{
+			System.setOut(stdout);
+		}
+		
+		if (err){
+			System.setErr(new PrintStream(newout, true));
+		}
+		else{
+			System.setErr(stderr);
+		}
+	}
+}
diff --git a/src/de/rki/ng4/ipig/gui/ProxyDialog.java b/src/de/rki/ng4/ipig/gui/ProxyDialog.java
new file mode 100644
index 0000000..80eeae8
--- /dev/null
+++ b/src/de/rki/ng4/ipig/gui/ProxyDialog.java
@@ -0,0 +1,159 @@
+/* Copyright (c) 2012,
+ * Mathias Kuhring, KuhringM at rki.de, Robert Koch-Institut, Germany, 
+ * All rights reserved. For details, please note the license.txt.
+ */
+
+package de.rki.ng4.ipig.gui;
+
+import java.awt.event.ActionEvent;
+import java.awt.event.ActionListener;
+
+import javax.swing.JButton;
+import javax.swing.JCheckBox;
+import javax.swing.JDialog;
+import javax.swing.JLabel;
+import javax.swing.JTextField;
+import javax.swing.SwingConstants;
+import javax.swing.UIManager;
+
+import de.rki.ng4.ipig.tools.Configurator;
+
+import java.awt.event.ItemListener;
+import java.awt.event.ItemEvent;
+
+/**
+ * <p>The ProxyDialog is a modal JDialog offering fields to set proxy settings 
+ * which are written to the Configurator after submission (Ok button).
+ * 
+ * @author Mathias Kuhring
+ *
+ */
+ at SuppressWarnings("serial")
+public class ProxyDialog extends JDialog {
+
+	private JTextField tfHost;
+	private JTextField tfPort;
+	private JTextField tfUser;
+	private JTextField tfPass;
+	private JCheckBox chckbxUseAuthenthication;
+	
+	private JButton btnSet;
+	private JButton btnAbort;
+
+	/**
+	 * Launch the application.
+	 */
+	public static void run() {
+		try {
+			ProxyDialog dialog = new ProxyDialog();
+			dialog.setDefaultCloseOperation(JDialog.DISPOSE_ON_CLOSE);
+			dialog.setVisible(true);
+		} catch (Exception e) {
+			e.printStackTrace();
+		}
+	}
+
+	/**
+	 * Create the dialog.
+	 */
+	public ProxyDialog() {
+		// try native look and feel
+		try {
+			UIManager.setLookAndFeel(UIManager.getSystemLookAndFeelClassName());
+		} catch(Exception e) {
+			System.out.println("Error setting native LAF: " + e);
+		}
+
+		setModal(true);
+		setTitle("Proxy Settings");
+		setBounds(100, 100, 350, 203);
+		setDefaultCloseOperation(JDialog.DISPOSE_ON_CLOSE);
+		getContentPane().setLayout(null);
+		setLocationRelativeTo(null);
+
+		JLabel lblHost = new JLabel("Host (ftp):");
+		lblHost.setHorizontalAlignment(SwingConstants.RIGHT);
+		lblHost.setBounds(10, 11, 50, 14);
+		getContentPane().add(lblHost);
+
+		JLabel lblPort = new JLabel("Port:");
+		lblPort.setHorizontalAlignment(SwingConstants.RIGHT);
+		lblPort.setBounds(10, 36, 50, 14);
+		getContentPane().add(lblPort);
+
+		JLabel lblUser = new JLabel("User:");
+		lblUser.setHorizontalAlignment(SwingConstants.RIGHT);
+		lblUser.setBounds(10, 61, 50, 14);
+		getContentPane().add(lblUser);
+
+		JLabel lblPassword = new JLabel("Password:");
+		lblPassword.setHorizontalAlignment(SwingConstants.RIGHT);
+		lblPassword.setBounds(10, 86, 50, 14);
+		getContentPane().add(lblPassword);
+
+		tfHost = new JTextField(Configurator.getProperty("proxyHost", ""));
+		tfHost.setBounds(70, 8, 262, 20);
+		getContentPane().add(tfHost);
+		tfHost.setColumns(10);
+
+		tfPort = new JTextField(Configurator.getProperty("proxyPort", ""));
+		tfPort.setColumns(10);
+		tfPort.setBounds(70, 33, 262, 20);
+		getContentPane().add(tfPort);
+
+		tfUser = new JTextField(Configurator.getProperty("proxyUser", ""));
+		tfUser.setEnabled(false);
+		tfUser.setColumns(10);
+		tfUser.setBounds(70, 58, 262, 20);
+		getContentPane().add(tfUser);
+
+		tfPass = new JTextField(Configurator.getProperty("proxyPass", ""));
+		tfPass.setEnabled(false);
+		tfPass.setBounds(70, 83, 262, 20);
+		getContentPane().add(tfPass);
+
+		chckbxUseAuthenthication = new JCheckBox("Use authentication");
+		chckbxUseAuthenthication.addItemListener(new ItemListener() {
+			public void itemStateChanged(ItemEvent arg0) {
+				if (chckbxUseAuthenthication.isSelected()) enableAuth(true);
+				else enableAuth(false);
+			}
+		});
+		chckbxUseAuthenthication.setBounds(70, 110, 143, 23);
+		getContentPane().add(chckbxUseAuthenthication);
+
+		enableAuth(Boolean.parseBoolean(Configurator.getProperty("proxyAuth", "false")));
+		
+		// pressing the Ok button will write the given proxy settings to the Configurator and close the dialog
+		btnSet = new JButton("Ok");
+		btnSet.addActionListener(new ActionListener() {
+			public void actionPerformed(ActionEvent arg0) {
+				Configurator.setProperty("proxyHost", tfHost.getText());
+				Configurator.setProperty("proxyPort", tfPort.getText());
+				Configurator.setProperty("proxyUser", tfUser.getText());
+				Configurator.setProperty("proxyPass", tfPass.getText());
+				Configurator.setProperty("proxyAuth", Boolean.toString(chckbxUseAuthenthication.isSelected()));
+				dispose();
+			}
+		});
+		btnSet.setBounds(70, 140, 89, 23);
+		getContentPane().add(btnSet);
+
+		btnAbort = new JButton("Cancel");
+		btnAbort.addActionListener(new ActionListener() {
+			public void actionPerformed(ActionEvent arg0) {
+				dispose();
+			}
+		});
+		btnAbort.setBounds(169, 140, 89, 23);
+		getContentPane().add(btnAbort);
+	}
+	
+	// enables/disables authentication fields
+	private void enableAuth(boolean b){
+		chckbxUseAuthenthication.setSelected(b);
+		tfUser.setEnabled(b);
+		tfPass.setEnabled(b);
+	}
+
+}
diff --git a/src/de/rki/ng4/ipig/mapping/Mapper.java b/src/de/rki/ng4/ipig/mapping/Mapper.java
new file mode 100644
index 0000000..b77cddc
--- /dev/null
+++ b/src/de/rki/ng4/ipig/mapping/Mapper.java
@@ -0,0 +1,272 @@
+/* Copyright (c) 2012,
+ * Mathias Kuhring, KuhringM at rki.de, Robert Koch-Institut, Germany, 
+ * All rights reserved. For details, please note the license.txt.
+ */
+
+package de.rki.ng4.ipig.mapping;
+
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Vector;
+import java.util.Map.Entry;
+
+import de.rki.ng4.ipig.data.Gene;
+import de.rki.ng4.ipig.data.GeneSet;
+import de.rki.ng4.ipig.data.Peptide;
+import de.rki.ng4.ipig.data.PeptideSet;
+import de.rki.ng4.ipig.exceptions.ExitException;
+import de.rki.ng4.ipig.tools.Configurator;
+import de.rki.ng4.ipig.tools.Info;
+import de.rki.ng4.ipig.tools.Logger;
+
+/**
+ * <p>The Mapper takes a PeptideSet and GeneSet and calculates for each Peptide its occurrences in a linked/referenced Gene
+ * ({@link #annotationMapping(PeptideSet, GeneSet)}) or in the all Genes ({@link #alternativeMapping(PeptideSet, GeneSet)}).
+ *  
+ * <p>Both methods use the Wu-Manber algorithm to match the peptides against amino acid sequences. 
+ * Afterwards the cdsMaps are used to map the aa sequence positions to genome positions.</p>
+ * 
+ * @author Mathias Kuhring
+ */
+public class Mapper {
+
+	public static String ANNOTATION = "annotation";
+	public static String ALTERNATIVE = "alternativ";
+	
+	private static Position position;
+
+	/**
+	 * Given a PeptideSet and a GeneSet, annotationMapping will try to map Peptides to Genes with common UniProtKB-AC, RefSeq or Ensemble_TRS.
+	 * 
+	 * @param pepset peptides to be mapped 
+	 * @param geneset genes to be used as mapping targets
+	 * @throws ExitException
+	 */
+	public static void annotationMapping(PeptideSet pepset, GeneSet geneset) throws ExitException{
+		Info info = new Info("map to genome (1)");
+
+		// Peps and genes are linked with the common prots (UniProtKB-AC, RefSeq or Ensemble_TRS)
+		HashMap<String, Vector<Integer>> pepmap = pepset.getCommonProts("id");
+		HashMap<String, Vector<Integer>> genemap = geneset.getCommonProts();
+
+		Vector<Integer> pepidx;
+		Vector<Integer> geneidx;
+		Vector<String> pepseqs;
+
+		WuManber wumanber;
+		HashMap<Integer, Vector<Integer>> matches;
+
+		Vector<Integer> cds;
+
+		for (Entry<String, Vector<Integer>> entry : pepmap.entrySet()){
+			pepidx = entry.getValue();
+			geneidx = genemap.get(entry.getKey());
+
+			if (geneidx != null){
+				pepseqs = new Vector<String>();
+				for (int i : pepidx){
+					pepseqs.add(pepset.get(i).getSequence());
+				}
+
+				// for each set of peptides with a common protein, the wu-manber alg. is initiated (SHIFT und HASH tables)
+				// with the peptides sequences as search pattern
+				wumanber = new WuManber(pepseqs, 20);
+
+				for (int i : geneidx){
+					Gene gene = geneset.get(i);
+
+					for (int p : pepidx){
+						pepset.get(p).setGeneAssigned(true);
+						pepset.get(p).addGene(gene.getName());
+					}
+					
+					// each gene with this common protein is used for a search
+					matches = wumanber.searchIn(gene.getAaSequence());
+
+					if (!matches.isEmpty()){
+						cds = getCdsMap(gene);
+
+						// search matches are evaluated
+						for (Entry<Integer, Vector<Integer>> match : matches.entrySet()){
+							Peptide pep = pepset.get(pepidx.get(match.getKey()));
+
+							for (int pos : match.getValue()){
+								int protStart = getProtStart(pep, gene, pos);
+								int protEnd = getProtEnd(pep, gene, pos);
+
+								try{
+									getPos(cds, protStart, protEnd);
+									write(pep, gene, ANNOTATION);
+								}
+								catch (IndexOutOfBoundsException e){
+									String message = "mapping index error: cds of " + gene.getName() + " might be inconsistent (e.g. check cds coordinates for start and stop codons).";
+									Logger.write(Configurator.getSysProperty("msPepSetName", "ipig")+".log", message);
+								}
+							}
+						}
+					}
+					
+					Configurator.checkBreak();
+				}
+			}
+		}
+
+		info.stop(pepset.mapPepCount(ANNOTATION), pepset.size());
+	}
+	
+	/**
+	 * <p>Given a PeptideSet and a GeneSet, alternativeMapping finds all possible matches for the given Peptides in all given Genes.</p>
+	 * 
+	 * <p>This method will only involve peptides which are not mapped yet (e.g. with {@link #annotationMapping(PeptideSet, GeneSet)}).</p>
+	 * 
+	 * @param pepset peptides to be mapped 
+	 * @param geneset genes to be used as mapping targets
+	 * @throws ExitException
+	 */
+	public static void alternativeMapping(PeptideSet pepset, GeneSet geneset) throws ExitException{
+		Info info = new Info("map to genome (2)");
+
+		// use only unmapped peptides
+		Vector<Integer> pepidx = pepset.getUnmapped();
+		Vector<String> pepseqs = new Vector<String>();
+		for (int i : pepidx){
+			pepseqs.add(pepset.get(i).getSequence());
+		}
+
+		// initiate wu-manber once with all peptide sequences as search patterns
+		WuManber wm = new WuManber(pepseqs, 20);
+		HashMap<Integer, Vector<Integer>> matches;
+		
+		Peptide pep;
+		Vector<Integer> cdsMap;
+		
+		for (Gene gene : geneset.getAll()){
+			// use each gene for a the search
+			matches = wm.searchIn(gene.getAaSequence());
+			
+			if (!matches.isEmpty()){
+				cdsMap = getCdsMap(gene);
+				
+				// search matches are evaluated
+				for (Entry<Integer, Vector<Integer>> match : matches.entrySet()){
+					pep = pepset.get(pepidx.get(match.getKey()));
+					pep.setGeneAssigned(true);
+					pep.addGene(gene.getName());
+
+					for (int pos : match.getValue()){
+						int protStart = getProtStart(pep, gene, pos);
+						int protEnd = getProtEnd(pep, gene, pos);
+						
+						try{
+							getPos(cdsMap, protStart, protEnd);
+							write(pep, gene, ALTERNATIVE);
+						}
+						catch (IndexOutOfBoundsException e){
+							String message = "mapping index error: cds of " + gene.getName() + " might be inconsistent (e.g. check cds coordinates for start and stop codons).";
+							Logger.write(Configurator.getSysProperty("msPepSetName", "ipig")+".log", message);
+						}
+					}
+				}
+			}
+
+			Configurator.checkBreak();
+		}
+		
+		info.stop(pepset.mapPepCount(ALTERNATIVE), pepset.size());
+	}
+
+	// builds a CdsMap, this is vector containing each position in the cds in increasing order
+	// e.g. [4,5,6,8,9,13,14,15] where (4,5,6), (8,9) and (13,14,15) are the exons.
+	private static Vector<Integer> getCdsMap(Gene gene){
+		Vector<Integer> cdsMap = new Vector<Integer>();
+
+		int[] exonStarts = gene.getExonStarts();
+		int[] exonEnds = gene.getExonEnds();
+		for (int i=0; i<exonStarts.length; i++){
+			for (int j=exonStarts[i]; j<exonEnds[i]; j++){
+				cdsMap.add(j);
+			}
+		}
+
+		int test;
+		int cdsStart = gene.getCdsStart();
+		int cdsEnd =  gene.getCdsEnd();
+		for (Iterator<Integer> it = cdsMap.iterator(); it.hasNext();){
+			test = it.next();
+			if (test < cdsStart) it.remove();
+			if (test >= cdsEnd) it.remove();
+		}
+
+		return cdsMap;
+	}
+
+	// corrects the peptide's start position in the gene's aminoacid sequence (resp. protein) depending on the strand.
+	private static int getProtStart(Peptide pep, Gene gene, int pos){
+		int protStartPos;
+
+		if (gene.getStrand() == '+'){
+			protStartPos = pos;
+		}
+		else{
+			protStartPos = ((gene.getAaSequence().length() + 1) - pos) - pep.getSequence().length();
+		}
+
+		return protStartPos * 3;
+	}
+
+	// corrects the peptide's end position in the gene's aminoacid sequence (resp. protein) depending on the strand.
+	private static int getProtEnd(Peptide pep, Gene gene, int pos){
+		int protEndPos;
+
+		if (gene.getStrand() == '+'){
+			protEndPos = pos + pep.getSequence().length();
+		}
+		else{
+			protEndPos = (gene.getAaSequence().length() + 1) - pos;
+		}
+
+		return protEndPos * 3;
+	}
+
+	// uses the peptide's in-protein position to find the in-genome position trough taking the corresponding subset of the cdsMap.
+	// testing for gaps in the subset reveals if there are introns resp. if the peptide is located in more then one exon.
+	private static void getPos(Vector<Integer> cdsMap, int start, int end) throws IndexOutOfBoundsException{
+		Vector<Integer> pepMap = new Vector<Integer>(cdsMap.subList(start, end));
+
+		position = new Position();
+		
+		position.addStartPos(pepMap.firstElement());
+		for (int i=1; i<pepMap.size(); i++){
+			if (pepMap.get(i) != pepMap.get(i-1) + 1){
+				position.addStartPos(pepMap.get(i));
+				position.addEndPos(pepMap.get(i-1)+1);
+			}
+		}
+		position.addEndPos(pepMap.lastElement()+1);
+	}
+
+	// this method adds the current calculated Position to the Peptide
+	private static boolean write(Peptide pep, Gene gene, String method){
+		if ((position.getStartPos().size() > 0) && (position.getEndPos().size() > 0)
+				&& (position.getStartPos().size() == position.getEndPos().size())){
+			position.setGeneName(gene.getName());
+			position.setChrom(gene.getChrom());
+			position.setStrand(gene.getStrand());
+			
+			if (pep.addPosition(position)){
+				pep.setMatchMethod(method);
+				pep.setGeneMapped(true);
+				gene.setUsed(true);
+				return true;
+			}
+			else return false;
+		}
+		else{
+			String message = "mapping error (peptide):\t" + pep.getRow() + "\n" + 
+					"mapping error (gene):\t" + gene.toString();
+			Logger.write(Configurator.getSysProperty("msPepSetName", "ipig")+".log", message);
+			return false;
+		}
+	}
+
+}
diff --git a/src/de/rki/ng4/ipig/mapping/Position.java b/src/de/rki/ng4/ipig/mapping/Position.java
new file mode 100644
index 0000000..d63de61
--- /dev/null
+++ b/src/de/rki/ng4/ipig/mapping/Position.java
@@ -0,0 +1,198 @@
+/* Copyright (c) 2012,
+ * Mathias Kuhring, KuhringM at rki.de, Robert Koch-Institut, Germany, 
+ * All rights reserved. For details, please note the license.txt.
+ */
+
+package de.rki.ng4.ipig.mapping;
+
+import java.util.Comparator;
+import java.util.Vector;
+
+/**
+ * <p>A Position object is used to keep the informations of a peptide-genome mapping, including e.g. the gene name,
+ * the chromosome, the strand and start and end positions (several if peptide is located in different exons).</p>
+ * 
+ * @author Mathias Kuhring
+ */
+public class Position {
+	// variables to be set by a match
+	private String geneName;
+	private String chrom;
+	private char strand;
+	private Vector<Integer> startPos;
+	private Vector<Integer> endPos;
+
+	private boolean validated;
+	private String modifier;
+
+	/**
+	 * Simple initialization.
+	 */
+	public Position(){
+		startPos = new Vector<Integer>();
+		endPos = new Vector<Integer>();
+	}
+
+	/**
+	 * Set the name of the gene where this Position is located.
+	 * 
+	 * @param name a gene name as String
+	 */
+	public void setGeneName(String name){
+		geneName = name;
+	}
+
+	/**
+	 * Returns the name of the gene where this Position is located.
+	 * 
+	 * @return the gene name
+	 */
+	public String getGeneName(){
+		return geneName;
+	}
+
+	/**
+	 * Set the chromosome of this Position (e.g. "chr12", "chrVI").
+	 * 
+	 * @param chr
+	 */
+	public void setChrom(String chr){
+		chrom = chr;
+	}
+
+	/**
+	 * Returns the chromosome where this Position is located.
+	 * 
+	 * @return a chromosome name
+	 */
+	public String getChrom(){
+		return chrom;
+	}
+
+	/**
+	 * Set the strand of this Position.
+	 * 
+	 * @param str the strand as char, either '+' or '-'
+	 */
+	public void setStrand(char str) {
+		strand = str;
+	}
+
+	/**
+	 * Returns the strand of this Position.
+	 * '+' for the positive strand and '-' for the negative one.
+	 * 
+	 * @return str the strand as char, either '+' or '-'
+	 */
+	public char getStrand(){
+		return strand;
+	}
+
+	/**
+	 * Adds a start position (on a chromosome) to a vector of start positions.
+	 * @param pos The first or a further start position
+	 */
+	public void addStartPos(int pos){
+		startPos.add(pos);
+	}
+
+	/**
+	 * <p> Returns a Vector with all start positions. More than one start positions indicates a peptide over more than one exon.</p>
+	 * <p> Use together with {@link #getEndPos()}, {@link #getChrom()} and {@link #getStrand()} for a complete localization.</p>
+	 * 
+	 * @return Vector of start positions
+	 */
+	public Vector<Integer> getStartPos(){
+		return startPos;
+	}
+
+	/**
+	 * Adds a end position (on a chromosome) to a vector of end positions.
+	 * @param pos The first or a further end position
+	 */
+	public void addEndPos(int pos) {
+		endPos.add(pos);
+	}
+
+	/**
+	 * <p> Returns a Vector with all end positions. More than one end positions indicates a peptide over more than one exon.</p>
+	 * <p> Use together with {@link #getStartPos()}, {@link #getChrom()} and {@link #getStrand()} for a complete localization.</p>
+	 * 
+	 * @return Vector of start positions
+	 */
+	public Vector<Integer> getEndPos(){
+		return endPos;
+	}
+
+	/**
+	 * Set a boolean value, if this Position has been validated.
+	 * 
+	 * @param validated set true if validated, false else
+	 */
+	public void setValidated(boolean validated) {
+		this.validated = validated;
+	}
+	
+	/**
+	 * Returns if this Position has been validated (as set by {@link #setValidated(boolean)}).
+	 * 
+	 * @return true if validated, false else
+	 */
+	public boolean isValidated() {
+		return validated;
+	}
+	
+	/**
+	 * Set a modifier which can be used to e.g. differentiate Positions.
+	 * 
+	 * @param modifier arbitrary String
+	 */
+	public void setModifier(String modifier) {
+		this.modifier = modifier;
+	}
+	
+	/**
+	 * Returns the modifier.
+	 * 
+	 * @return modifier String if set, "" if null
+	 */
+	public String getModifier() {
+		if (modifier == null)
+			return "";
+		return modifier;
+	}
+
+	/**
+	 * A Comparator to sort Positions by the chromosome, strand, start position and end position.
+	 * 
+	 * @author Mathias Kuhring
+	 */
+	static public class PositionComparator implements Comparator<Position> {
+		@Override
+		public int compare(Position pos0, Position pos1) {
+			int result;
+			if ((result = pos0.getChrom().compareTo(pos1.getChrom())) == 0){
+				if ((result = ((Character) pos0.getStrand()).compareTo((Character) pos1.getStrand())) == 0){
+					if (!pos0.getStartPos().equals(pos1.getStartPos()) ||
+							!pos0.getEndPos().equals(pos1.getEndPos())){
+						int min = Math.min(pos0.getStartPos().size(), pos1.getStartPos().size());
+						result = -1;
+						for (int i=0; i<min; i++){
+							if (pos0.getStartPos().get(i) > pos1.getStartPos().get(i)){
+								result = 1;
+								break;
+							}
+						}
+						for (int i=0; i<min; i++){
+							if (pos0.getEndPos().get(i) > pos1.getEndPos().get(i)){
+								result = 1;
+								break;
+							}
+						}
+					}
+				}
+			}
+			return result;
+		}
+	}
+}
diff --git a/src/de/rki/ng4/ipig/mapping/Validator.java b/src/de/rki/ng4/ipig/mapping/Validator.java
new file mode 100644
index 0000000..7f9f081
--- /dev/null
+++ b/src/de/rki/ng4/ipig/mapping/Validator.java
@@ -0,0 +1,107 @@
+/* Copyright (c) 2012,
+ * Mathias Kuhring, KuhringM at rki.de, Robert Koch-Institut, Germany, 
+ * All rights reserved. For details, please note the license.txt.
+ */
+
+package de.rki.ng4.ipig.mapping;
+
+import java.util.HashMap;
+import java.util.Vector;
+
+import de.rki.ng4.ipig.data.Gene;
+import de.rki.ng4.ipig.data.GeneSet;
+import de.rki.ng4.ipig.data.Peptide;
+import de.rki.ng4.ipig.data.PeptideSet;
+import de.rki.ng4.ipig.exceptions.ExitException;
+import de.rki.ng4.ipig.tools.Configurator;
+import de.rki.ng4.ipig.tools.Info;
+import de.rki.ng4.ipig.tools.Logger;
+import de.rki.ng4.ipig.tools.Translator;
+
+/**
+ * <p>This class can validate the position results. It's more for development testing than for end user usage,
+ * cause it needs a GeneSet with nucleotide sequence data, which needs a lot of memory.</p>
+ * 
+ * @author Mathias Kuhring
+ */
+public class Validator {
+
+	/**
+	 * For all Peptides the validation takes Genes nucleotide sequences and extracts the parts indicated by the Peptide's positions.
+	 * The parts are then translated to amino acid sequences and compared with the Peptide's sequence.
+	 * 
+	 * @param peps
+	 * @param genes
+	 * @throws ExitException
+	 */
+	public static void validate(PeptideSet peps, GeneSet genes) throws ExitException{
+		Info info = new Info("validate mappings");
+		int count = 0;
+
+		HashMap<String, Vector<Integer>> genemap = peps.getCommonGenes();
+		Vector<Integer> pepidx;
+		Peptide pep;
+		Gene gene;
+		String dna;
+		String peptide;
+		Translator trans = new Translator();
+		boolean match;
+		StringBuffer output = new StringBuffer();
+
+		for (int g=0; g<genes.size(); g++){
+			gene = genes.get(g);
+			pepidx = genemap.get(genes.get(g).getName());
+			if (pepidx != null){
+				for (int i=0; i<pepidx.size(); i++){
+					pep = peps.get(pepidx.get(i));
+
+					for (Position pos : pep.getPositions()){
+						if (pos.getGeneName().matches(gene.getName()) && !pos.isValidated()){
+
+							dna = getDna(pos,gene);
+							peptide = trans.dnaToPeptide(dna).split("\\*")[0]; 
+							match = pep.getSequence().equals(peptide);
+							if (!match){
+								String message = "wrong mapping: " + peptide + "<>" + pep.getSequence() + "\t" + 
+										pep.getMatchMethod() + "\t" + pos.getGeneName() + "\t" + pos.getChrom() + "\t" + pos.getStrand() 
+										+ "\t" +  pos.getStartPos() + "\t" + pos.getEndPos();
+								Logger.write(Configurator.getSysProperty("msPepSetName", "ipig")+".log", message);
+								output.append(message + "\n");
+							}
+							pos.setValidated(true);
+							count++;
+						}
+					}
+				}
+				Configurator.checkBreak();
+			}
+		}
+
+		info.stop(count, peps.mapPosCount());
+		System.out.print(output.toString());
+	}
+
+	// Extracts a part of a Gene's dna sequence indicated by a Position
+	private static String getDna(Position pos, Gene gene){
+		Vector<Integer> starts = pos.getStartPos();
+		Vector<Integer> ends = pos.getEndPos();
+		StringBuffer dna = new StringBuffer();
+
+		int start;
+		int end;
+
+		for (int i=0; i<starts.size(); i++){
+			start = starts.get(i) - gene.getTxStart();
+			end = ends.get(i) - gene.getTxStart();
+			dna.append(gene.getSequence().substring(start, end));
+		}
+
+		if (gene.getStrand() == '-'){
+			dna.reverse();
+			return Translator.complement(dna.toString());
+		}
+		else{
+			return dna.toString().toUpperCase();
+		}
+	}
+}
diff --git a/src/de/rki/ng4/ipig/mapping/WuManber.java b/src/de/rki/ng4/ipig/mapping/WuManber.java
new file mode 100644
index 0000000..db7bf8c
--- /dev/null
+++ b/src/de/rki/ng4/ipig/mapping/WuManber.java
@@ -0,0 +1,177 @@
+/* Copyright (c) 2012,
+ * Mathias Kuhring, KuhringM at rki.de, Robert Koch-Institut, Germany, 
+ * All rights reserved. For details, please note the license.txt.
+ */
+
+package de.rki.ng4.ipig.mapping;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map.Entry;
+import java.util.Vector;
+
+/**
+ * <p>Implementation of the Wu-Manber algorithm for multi-pattern searching.</p>
+ * 
+ * <p>A short description: <a href="https://www.mi.fu-berlin.de/wiki/pub/ABI/AdvancedAlgorithms11_Searching/script-01_02-Intro_HorspoolWuManber.pdf">
+ * https://www.mi.fu-berlin.de/wiki/pub/ABI/AdvancedAlgorithms11_Searching/script-01_02-Intro_HorspoolWuManber.pdf</a> (Page 7 and 8).</p>
+ * 
+ * @author Mathias Kuhring
+ */
+public class WuManber {
+
+	private HashMap<String,Integer> shift;
+	private HashMap<String,Vector<Integer>> hash;
+
+	private HashMap<Integer,Vector<Integer>> results;
+
+	private Vector<String> patterns;
+	private String text;
+
+	private StringBuffer output;
+
+	private int alphabetSize;
+	private int blockSize;
+	private int lmin, lmax, defaultValue;
+
+	/**
+	 * The constructor takes a list of patterns and prepares the lookup tables (preprocessing).
+	 * 
+	 * @param patterns the list (Vector) of patterns to be searched
+	 * @param alphabetSize the alphabet size, to determine an optimal block size
+	 */
+	public WuManber(Vector<String> patterns, int alphabetSize){
+		shift = new HashMap<String,Integer>();
+		hash = new HashMap<String,Vector<Integer>>();
+		results = new HashMap<Integer,Vector<Integer>>();
+		this.patterns = patterns;
+		this.alphabetSize = alphabetSize;
+		preprocessing();
+	}
+
+	/**
+	 * The searchIn method takes a text and returns matches of the patterns in this text.
+	 * The results are given as a HashMap with indices to the pattern input Vector as keys and Vectors containing the match positions as values.
+	 * 
+	 * @param text the text in which it is searched
+	 * @return HashMap with pattern indices as keys and match position lists as values
+	 */
+	public HashMap<Integer,Vector<Integer>> searchIn(String text){
+		this.text = text;
+		results.clear();
+
+		int pos = lmin-1;
+		String key;
+		Vector<Integer> idx;
+
+		while (pos < text.length()){
+			key = text.substring(pos-blockSize+1, pos+1);
+			if (shift.containsKey(key)){
+				if (shift.get(key) == 0){
+					idx = hash.get(key);
+					verification(pos, idx);
+					pos++;
+				}
+				else
+					pos = pos + shift.get(key);
+			}
+			else{
+				pos = pos + defaultValue;
+			}	
+		}
+
+		return results;
+	}
+
+	// the wu-manber verification step
+	private void verification(int pos, Vector<Integer> indices){
+		String part = text.substring(Math.max(0,(pos-lmax+1)), pos+1);
+		String pattern;
+		for (int i : indices){
+			pattern = patterns.get(i);
+			if (part.endsWith(pattern)){
+				if (!results.containsKey(i))
+					results.put(i, new Vector<Integer>());
+				results.get(i).add(pos-pattern.length()+1);
+			}
+		}
+	}
+
+	// calculation of lmin, lmax, the block size and a default value for the shift table.
+	private void preprocessing(){
+		lmin = Integer.MAX_VALUE;
+		lmax = Integer.MIN_VALUE;
+		for (String p : this.patterns){
+			lmin = Math.min(lmin, p.length());
+			lmax = Math.max(lmax, p.length());
+		}
+		blockSize = (int) (Math.log(2.0 * lmin * patterns.size()) / Math.log(alphabetSize));
+		defaultValue = lmin - blockSize + 1;
+		buildShift();
+		buildHash();
+	}
+
+	// building the shift table
+	private void buildShift(){
+		String block;
+		for (String pattern : patterns){
+			for (int i=0; i<pattern.length()-blockSize+1; i++){
+				block = pattern.substring(i, i+blockSize);
+				if (shift.get(block) == null){
+					shift.put(block, Math.min(defaultValue, pattern.length()-(i+blockSize)));
+				}
+				else{
+					shift.put(block, Math.min(shift.get(block), pattern.length()-(i+blockSize)));
+				}
+			}
+		}
+	}
+
+	// building the hash table
+	private void buildHash(){
+		String block;
+		for (int i=0; i<patterns.size(); i++){
+			block = patterns.get(i).substring(patterns.get(i).length()-blockSize, patterns.get(i).length());
+			if (!hash.containsKey(block))
+				hash.put(block, new Vector<Integer>());
+			hash.get(block).add(i);
+		}
+	}
+
+	/**
+	 * <p>A simple file output of the results. The text is printed in one row and the patterns are printed in the next row
+	 * according to all their matching positions. Overlaps of patterns are not marked.</p>
+	 * 
+	 * <p>This method only builds a file if the results list is not empty, so usually after a search pass.</p>
+	 * 
+	 * @param filename name for the file
+	 */
+	public void printToFile(String filename){
+		if (!results.isEmpty()){
+			try {
+				BufferedWriter resBuffer = new BufferedWriter (new FileWriter(new File(filename)));
+				output = new StringBuffer();
+				for (int i=0; i<text.length(); i++){
+					output.append('_');
+				}
+				int idx;
+				Vector<Integer> pos;
+				for (Entry<Integer, Vector<Integer>> entry : results.entrySet()){
+					idx = entry.getKey();
+					pos = entry.getValue();
+					for (int p : pos)
+						output.replace(p, p+patterns.get(idx).length(), patterns.get(idx));
+				}
+				resBuffer.write(text);
+				resBuffer.newLine();
+				resBuffer.write(output.toString());
+				resBuffer.close();
+			} catch (IOException e) {
+				System.out.println(e.getMessage());
+			}
+		}
+	}
+}
diff --git a/src/de/rki/ng4/ipig/tools/Configurator.java b/src/de/rki/ng4/ipig/tools/Configurator.java
new file mode 100644
index 0000000..45a1f22
--- /dev/null
+++ b/src/de/rki/ng4/ipig/tools/Configurator.java
@@ -0,0 +1,185 @@
+/* Copyright (c) 2012,
+ * Mathias Kuhring, KuhringM at rki.de, Robert Koch-Institut, Germany, 
+ * All rights reserved. For details, please note the license.txt.
+ */
+
+package de.rki.ng4.ipig.tools;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.Properties;
+
+import de.rki.ng4.ipig.exceptions.ExitException;
+
+/**
+ * <p>The Configurator improves the {@link Properties} class with static methods (it is usable like a singleton),
+ * so configuration parameters are available every in the program.</p>
+ * 
+ * <p>The Configurator contains two Properties objects, one for "user" parameters which can be saved and loaded (e.g. for parameter control by a user)
+ * and one for "system" parameters which can't be saved or loaded (so code intern or temporary use only).</p> 
+ * 
+ * <p>This class also provides set and check methods for a breakpoint which might be useful for a multi-threaded program,
+ * to exit some part of the program properly without killing something in a bad moment (e.g. while streams are open).</p>
+ * 
+ * @author Mathias Kuhring
+ */
+public class Configurator {
+
+	private static Properties user = new Properties();
+	private static Properties system = new Properties();
+
+	/**
+	 * <p>Loads "user" properties from a file.</p>
+	 * 
+	 * <p>For more informations about how such files look like see {@link Properties}.</p>
+	 * 
+	 * @param filename name of the properties file
+	 * @throws ExitException
+	 */
+	public static void loadProperties(String filename) throws ExitException{
+		try {
+			BufferedReader confBuffer = new BufferedReader(new FileReader(new File(filename)));
+			user.load(confBuffer);
+			confBuffer.close();
+		} catch (FileNotFoundException e) {
+			throw new ExitException(e.getMessage());
+		} catch (IOException e) {
+			throw new ExitException(e.getMessage());
+		}
+	}
+	
+	/**
+	 * <p>Saves "user" properties to a file.</p>
+	 * 
+	 * @param filename name of the properties file
+	 * @throws ExitException
+	 */
+	public static void saveProperties(String filename) throws ExitException{
+		try {
+			BufferedWriter confBuffer = new BufferedWriter(new FileWriter(new File(filename)));
+			user.store(confBuffer, null);
+			confBuffer.close();
+		} catch (FileNotFoundException e) {
+			throw new ExitException(e.getMessage());
+		} catch (IOException e) {
+			throw new ExitException(e.getMessage());
+		}
+	}
+
+	/**
+	 * Returns a "user" property indicated by a name (key), if available.
+	 * 
+	 * @param key name of the property
+	 * @return property's value if available, null else
+	 */
+	public static String getProperty(String key){
+		return user.getProperty(key);
+	}
+
+	/**
+	 * Returns a "user" property indicated by a name (key) or if not available a given default value.
+	 * 
+	 * @param key name of the property
+	 * @param defaultValue a default return value
+	 * @return property's value if available, default value else
+	 */
+	public static String getProperty(String key, String defaultValue){
+		return user.getProperty(key, defaultValue);
+	}
+
+	/**
+	 * Sets a "user"  property indicated by a name (key) with a arbitrary value.
+	 * 
+	 * @param key name of the property
+	 * @param value value for the property
+	 */
+	public static void setProperty(String key, String value) {
+		user.setProperty(key, value);
+	}
+
+	/**
+	 * Returns all properties, incl. "user" and "system" properties.
+	 * 
+	 * @return a Properties object containing all properties set or loaded so far
+	 */
+	public static Properties getProperties() {
+		Properties output = new Properties();
+		output.putAll(user);
+		output.putAll(system);
+		return output;
+	}
+	
+	/**
+	 * Returns a "system" property indicated by a name (key), if available.
+	 * 
+	 * @param key name of the property
+	 * @return property's value if available, null else
+	 */
+	public static String getSysProperty(String key){
+		return system.getProperty(key);
+	}
+
+	/**
+	 * Returns a "system" property indicated by a name (key) or if not available a given default value.
+	 * 
+	 * @param key name of the property
+	 * @param defaultValue a default return value
+	 * @return property's value if available, default value else
+	 */
+	public static String getSysProperty(String key, String defaultValue){
+		return system.getProperty(key, defaultValue);
+	}
+
+	/**
+	 * Sets a "system"  property indicated by a name (key) with a arbitrary value.
+	 * 
+	 * @param key name of the property
+	 * @param value value for the property
+	 */
+	public static void setSysProperty(String key, String value) {
+		system.setProperty(key, value);
+	}
+	
+	/**
+	 * <p>Sets a breakpoint (or stop signal) which can be used with {@link #checkBreak()}.</p>
+	 * 
+	 * @param brk true activates the breakpoint
+	 */
+	public static void setBreak(boolean brk){
+		system.setProperty("break", Boolean.toString(brk));
+	}
+	
+	/**
+	 * <p>Throws an ExitException if the breakpoint is activated with {@link #setBreak(boolean)}.</p>
+	 * 
+	 * @throws ExitException
+	 */
+	public static void checkBreak() throws ExitException{
+		if (system.getProperty("break", "false").matches("true")){
+			throw new ExitException("stop by user\n");
+		}
+	}
+	
+	/**
+	 * Removes a "user"  property indicated by a name (key).
+	 * 
+	 * @param key name of the property to remove
+	 */
+	public static void removeProperty(String key){
+		user.remove(key);
+	}
+	
+	/**
+	 * Removes a "system"  property indicated by a name (key).
+	 * 
+	 * @param key name of the property to remove
+	 */
+	public static void removeSysProperty(String key){
+		system.remove(key);
+	}
+}
diff --git a/src/de/rki/ng4/ipig/tools/Info.java b/src/de/rki/ng4/ipig/tools/Info.java
new file mode 100644
index 0000000..0143f32
--- /dev/null
+++ b/src/de/rki/ng4/ipig/tools/Info.java
@@ -0,0 +1,85 @@
+/* Copyright (c) 2012,
+ * Mathias Kuhring, KuhringM at rki.de, Robert Koch-Institut, Germany, 
+ * All rights reserved. For details, please note the license.txt.
+ */
+
+package de.rki.ng4.ipig.tools;
+
+/**
+ * <p>Simple class to print some status messages.</p>
+ * 
+ * <p>There are two kinds of information contents (info modes) available (see {@link #setInfomode(boolean)}).</p>
+ * 
+ * @author Mathias Kuhring
+ *
+ */
+public class Info {
+
+	private static boolean infomode = false;
+
+	private long begin;
+	private long end;
+	private static final String space = "\t";
+
+	/**
+	 * <p>An Info object is initiated with a directly printed start message followed by "...".</p>
+	 * 
+	 * @param message the start message
+	 */
+	public Info(String message){
+		start(message);
+	}
+
+	// print the start message and takes the time
+	private void start(String message){
+		System.out.print(message + "..." + space);
+		begin = System.currentTimeMillis();
+	}
+
+	/**
+	 * <p>Prints either a simple "done" message or an arbitrary self-indicated ratio and the runtime, if the info mode is on ({@link #setInfomode(boolean)}).
+	 * 
+	 * <p>E.g. "processed 30 of 50 in 5 sec"</p>
+	 * 
+	 * @param part first part of the ratio
+	 * @param all second part of the ratio
+	 */
+	public void stop(int part, int all){
+		end = System.currentTimeMillis();
+		if (infomode){
+			System.out.println("processed " + part + " of " + all + " in " + (end-begin)/1000 + " s");
+		}
+		else{
+			System.out.println("done");
+		}
+	}
+	
+	/**
+	 * <p>Prints either a simple "done" message or the runtime, if the info mode is on (see {@link #setInfomode(boolean)}).</p>
+	 * 
+	 * <p>E.g. "done in 5 sec"</p>
+	 */
+	public void stop(){
+		end = System.currentTimeMillis();
+		if (infomode){
+			System.out.println("done" + " in " + (end-begin)/1000 + " s");
+		}
+		else{
+			System.out.println("done");
+		}
+	}
+
+	/**
+	 * <p>Switches the info mode.</p>
+	 * 
+	 * <p>If the info mode is off (false), an Info object only provides simple procedure start and stop messages.</p>
+	 * 
+	 * <p>If the info mode is on (true), an Info object additionally provides the procedure's runtime in seconds
+	 * and if wanted an arbitrary self-indicated ratio.</p>
+	 * 
+	 * @param mode true for info mode on and false for info mode off
+	 */
+	public static void setInfomode(boolean mode){
+		infomode = mode;
+	}
+}
diff --git a/src/de/rki/ng4/ipig/tools/Logger.java b/src/de/rki/ng4/ipig/tools/Logger.java
new file mode 100644
index 0000000..f1a9946
--- /dev/null
+++ b/src/de/rki/ng4/ipig/tools/Logger.java
@@ -0,0 +1,55 @@
+/* Copyright (c) 2012,
+ * Mathias Kuhring, KuhringM at rki.de, Robert Koch-Institut, Germany, 
+ * All rights reserved. For details, please note the license.txt.
+ */
+
+package de.rki.ng4.ipig.tools;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+
+/**
+ * Simple logging class.
+ * 
+ * @author Mathias Kuhring
+ */
+public class Logger {
+	
+	/**
+	 * Initiates a logging file with date and time ("dd.MM.yyyy HH:mm:ss").
+	 * 
+	 * @param filename name of the log file
+	 * @param append true if existing data in the log file should be kept, false else
+	 */
+	public static void init(String filename, boolean append){
+		try {
+			BufferedWriter logBuffer = new BufferedWriter(new FileWriter(new File(filename),append));
+			logBuffer.write(new SimpleDateFormat("dd.MM.yyyy HH:mm:ss").format(new Date()));
+			logBuffer.newLine();
+			logBuffer.close();
+		} catch (IOException e) {
+			System.out.println(e.getMessage());
+		}
+	}
+
+	/**
+	 * Appends text to a logging file.
+	 * 
+	 * @param filename name of the log file
+	 * @param message text to be appended
+	 */
+	public static void write(String filename, String message){
+		try {
+			BufferedWriter logBuffer = new BufferedWriter(new FileWriter(new File(filename),true));
+			logBuffer.write(message);
+			logBuffer.newLine();
+			logBuffer.close();
+		} catch (IOException e) {
+			System.out.println(e.getMessage());
+		}
+	}
+}
diff --git a/src/de/rki/ng4/ipig/tools/MascotCSV2TXT.java b/src/de/rki/ng4/ipig/tools/MascotCSV2TXT.java
new file mode 100644
index 0000000..bb0e3c5
--- /dev/null
+++ b/src/de/rki/ng4/ipig/tools/MascotCSV2TXT.java
@@ -0,0 +1,165 @@
+/* Copyright (c) 2012,
+ * Mathias Kuhring, KuhringM at rki.de, Robert Koch-Institut, Germany, 
+ * All rights reserved. For details, please note the license.txt.
+ */
+
+package de.rki.ng4.ipig.tools;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Vector;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import javax.swing.JFileChooser;
+import javax.swing.UIManager;
+import javax.swing.filechooser.FileNameExtensionFilter;
+
+/**
+ * Tool to convert a Mascot CSV to an tab separated txt file with the columns and the format like required by the ipig.
+ * 
+ * This tool is not fully tested, so it maybe only works with fully exported Mascot CSVs.
+ * Its more for development use and it is actually not a part of the ipig yet.
+ * 
+ * @author Mathias Kuhring
+ *
+ */
+public class MascotCSV2TXT {
+
+	public static void main(String[] args) {
+		final String[] temp = {"prot_acc", "prot_desc", "pep_query", "pep_isunique", "pep_exp_z", "pep_score", "pep_seq", "pep_var_mod", "pep_var_mod_pos"};
+		final Vector<String> colNames = new Vector<String>(Arrays.asList(temp));
+
+		try {
+			UIManager.setLookAndFeel(UIManager.getSystemLookAndFeelClassName());
+		} catch(Exception e) {
+			System.out.println("Error setting native LAF: " + e);
+		}
+		
+		File file = null;
+		if (args.length == 0){
+			JFileChooser fc = new JFileChooser();
+			fc.setFileFilter(new FileNameExtensionFilter("mascot csv", "csv"));
+			
+			int state = fc.showOpenDialog( null );
+
+			if ( state == JFileChooser.APPROVE_OPTION )
+			{
+				file = fc.getSelectedFile();
+			}
+			else
+				System.exit(0);
+		}	
+		else
+			file = new File(args[0]);
+		
+		String filename = file.getName().substring(0, file.getName().lastIndexOf("."));
+		String path = file.getParent();
+		
+		try{
+		BufferedReader input = new BufferedReader(new FileReader(file));
+		BufferedWriter output = new BufferedWriter(new FileWriter(path + "/" + filename + ".txt"));
+		
+		boolean isMascot = false;
+		boolean unique = false;
+		
+		Pattern p = Pattern.compile("(\"(?:[^\"]|\"\")*\"|[^\",\r\n]*)(,|\r\n?|\n)?");
+		Matcher m;
+		Vector<String> fields = new Vector<String>();
+		
+		Vector<Integer> cols = new Vector<Integer>();
+		String line;
+		while (input.ready()){
+			line = input.readLine();
+			if (isMascot = line.startsWith("prot_hit_num")){
+
+				m = p.matcher(line);
+				while ( m.find() ){
+					String field = m.group();
+					if (field.endsWith(",") || field.endsWith(";"))
+						field = field.substring(0, field.length()-1);
+					while (field.startsWith("\"") && field.endsWith("\""))
+						field = field.substring(1, field.length()-1);
+					while (field.startsWith(" "))
+						field = field.substring(1, field.length());
+					fields.add(field);
+				}
+				
+				for (int i=0; i<fields.size(); i++){
+					if (colNames.contains(fields.get(i))){
+						cols.add(i);
+					}
+					if (fields.get(i).matches("pep_isunique"))
+						unique = true;
+				}
+				isMascot = cols.size() > 0;
+				break;
+			}
+		}
+		if (isMascot){
+			Vector<String> header = new Vector<String>();
+			for (int i=0; i<cols.size()-1; i++){
+				header.add(fields.get(cols.get(i)) + "\t");
+			}
+			header.add(fields.get(cols.lastElement()));
+			if (!unique)
+				header.add(3, "pep_isunique\t");
+			header.lastElement().replace("\\t", "");
+			
+			for (String s : header)
+				output.write(s);
+			output.newLine();
+			
+			while (input.ready()){
+				m = p.matcher(input.readLine());
+				fields = new Vector<String>();
+				while ( m.find() ){
+					String field = m.group();
+					if (field.endsWith(",") || field.endsWith(";"))
+						field = field.substring(0, field.length()-1);
+					while (field.startsWith("\"") && field.endsWith("\""))
+						field = field.substring(1, field.length()-1);
+					while (field.startsWith(" "))
+						field = field.substring(1, field.length());
+					fields.add(field);
+				}
+								
+				Vector<String> out = new Vector<String>();
+				for (int i=0; i<cols.size()-1; i++){
+					out.add(fields.get(cols.get(i)) + "\t");
+				}
+				out.add(fields.get(cols.lastElement()));
+				if (!unique)
+					out.add(3, "1\t");
+				
+				for (String s : out)
+					output.write(s);
+				output.newLine();
+			}
+			
+			input.close();
+			output.close();
+			
+			System.out.println("done");
+		}
+		else
+			System.out.println("error:\tcouldn't find nessecary data in " + file.getName() +
+					"\n\t" + "header should start with \"prot_hit_num\" and contain following fields:" +
+					"\n\t" + colNames.toString() +
+					"\n\t" + "(if pep_isunique is missing it is assumed that peptides are unique)");
+	
+		}
+		catch (FileNotFoundException e) {
+			System.out.println(e.getLocalizedMessage());
+		} 
+		catch (IOException e) {
+			System.out.println(e.getLocalizedMessage());
+		}
+	}
+}
diff --git a/src/de/rki/ng4/ipig/tools/Translator.java b/src/de/rki/ng4/ipig/tools/Translator.java
new file mode 100644
index 0000000..850de0a
--- /dev/null
+++ b/src/de/rki/ng4/ipig/tools/Translator.java
@@ -0,0 +1,220 @@
+/* Copyright (c) 2012,
+ * Mathias Kuhring, KuhringM at rki.de, Robert Koch-Institut, Germany, 
+ * All rights reserved. For details, please note the license.txt.
+ */
+
+package de.rki.ng4.ipig.tools;
+
+import java.util.HashMap;
+
+/**
+ * <p>The Translator provides simple dna translators, one for the common dna to peptide translation 
+ * and another for a strand to complement strand translation.</p>
+ * 
+ * @author Mathias Kuhring
+ */
+public class Translator {
+
+	private HashMap<String,String> codonTable;
+	
+	/**
+	 * The constructor initializes a HashMap for the codon-aminoacid relations with a standard codon table.
+	 */
+	public Translator(){
+		codonTable = new HashMap<String,String>();
+		initializeCodonTable();
+	}
+	
+	/**
+	 * <p>This method provides a dna to peptide translation with a standard codon table. Peptides are returned in one-letter code.</p>
+	 *  
+	 *  <p>Note: this method handles only valid codons. A dna string including other characters than ACGT (e.g. N)
+	 *  will produce a non-valid codon. This leads to "null" in the peptide and therefore messes the peptide length up.</p>
+	 *  
+	 *  <p>Note: only dna strings with a length completely divisible by 3 are considered, as a codon has length 3.
+	 *  For strings with a impractical length the method returns null.</p>
+	 *  
+	 * @param dna a dna string
+	 * @return a one-letter coded peptide string or null if dna length is impractical
+	 */
+	public String dnaToPeptide(String dna){
+		if (dna.length() % 3 != 0)
+			return null;
+		
+		StringBuffer peptide = new StringBuffer();
+		String codon;
+		String aa;
+		
+		dna = dna.toUpperCase();
+		for (int i=0; i<dna.length(); i+=3){
+			codon = dna.substring(i,i+3);
+			aa = codonTable.get(codon);
+			peptide.append(aa);
+		}
+		
+		return peptide.toString();
+	}
+	
+	/**
+	 * <p>Builds the complement string of a dna string.</p>
+	 * 
+	 * <p>This includes A->T, C->G, G->C and T->A. Other characters (e.g. U or N) are not supported and will not be changed.</p>
+	 * 
+	 * <p>Note: the output string is not reversed!</p>
+	 * 
+	 * @param dna a dna string
+	 * @return
+	 */
+	public static String complement(String dna){
+		String tmp = dna.toUpperCase();
+		StringBuffer compl = new StringBuffer();
+
+		for (int i=0; i<tmp.length(); i++){
+			switch (tmp.charAt(i)){
+			case 'A':
+				compl.append('T');
+				break;
+			case 'C':
+				compl.append('G');
+				break;
+			case 'G':
+				compl.append('C');
+				break;
+			case 'T':
+				compl.append('A');
+				break;
+			default:
+				compl.append(tmp.charAt(i));
+			}
+		}
+
+		return compl.toString();
+	}
+	
+	// builds the hashmap for the codon-aminoacid relations
+	private void initializeCodonTable(){
+
+		String[] bases = {"T","C","A","G"};
+
+		for(String base1 : bases){
+			for(String base2 : bases){
+				for(String base3 : bases){
+
+					if(base1.equals("T")){
+
+						if(base2.equals("T")){
+							if(base3.equals("T") || base3.equals("C")){
+								String codon = base1+base2+base3;
+								codonTable.put(codon, "F");
+							}else{
+								String codon = base1+base2+base3;
+								codonTable.put(codon, "L");
+							}
+						}else if(base2.equals("C")){
+							String codon = base1+base2+base3;
+							codonTable.put(codon, "S");
+						}else if(base2.equals("A")){
+							if(base3.equals("T") || base3.equals("C")){
+								String codon = base1+base2+base3;
+								codonTable.put(codon, "Y");
+							}else{
+								String codon = base1+base2+base3;
+								codonTable.put(codon, "*");
+							}
+						}else if(base2.equals("G")){
+							if(base3.equals("T") || base3.equals("C")){
+								String codon = base1+base2+base3;
+								codonTable.put(codon, "C");
+							}else if(base3.equals("A")){
+								String codon = base1+base2+base3;
+								codonTable.put(codon, "*");
+							}else if(base3.equals("G")){
+								String codon = base1+base2+base3;
+								codonTable.put(codon, "W");
+							}
+						}
+
+					}
+
+					if(base1.equals("C")){
+
+						if(base2.equals("T")){
+							String codon = base1+base2+base3;
+							codonTable.put(codon, "L");
+						}else if(base2.equals("C")){
+							String codon = base1+base2+base3;
+							codonTable.put(codon, "P");
+						} else if(base2.equals("A")){
+							if(base3.equals("T") || base3.equals("C")){
+								String codon = base1+base2+base3;
+								codonTable.put(codon, "H");
+							}else{
+								String codon = base1+base2+base3;
+								codonTable.put(codon, "Q");
+							}
+						} else if(base2.equals("G")){
+							String codon = base1+base2+base3;
+							codonTable.put(codon, "R");
+						}
+
+					}
+
+					if(base1.equals("A")){
+
+						if(base2.equals("T")){
+							if(base3.equals("G")){
+								String codon = base1+base2+base3;
+								codonTable.put(codon, "M");
+							}else{
+								String codon = base1+base2+base3;
+								codonTable.put(codon, "I");
+							}
+						}else if(base2.equals("C")){
+							String codon = base1+base2+base3;
+							codonTable.put(codon, "T");
+						} else if(base2.equals("A")){
+							if(base3.equals("T") || base3.equals("C")){
+								String codon = base1+base2+base3;
+								codonTable.put(codon, "N");
+							}else{
+								String codon = base1+base2+base3;
+								codonTable.put(codon, "K");
+							}
+						} else if(base2.equals("G")){
+							if(base3.equals("T") || base3.equals("C")){
+								String codon = base1+base2+base3;
+								codonTable.put(codon, "S");
+							}else{
+								String codon = base1+base2+base3;
+								codonTable.put(codon, "R");
+							}
+						}
+
+					}
+
+					if(base1.equals("G")){
+
+						if(base2.equals("T")){
+							String codon = base1+base2+base3;
+							codonTable.put(codon, "V");
+						}else if(base2.equals("C")){
+							String codon = base1+base2+base3;
+							codonTable.put(codon, "A");
+						} else if(base2.equals("A")){
+							if(base3.equals("T") || base3.equals("C")){
+								String codon = base1+base2+base3;
+								codonTable.put(codon, "D");
+							}else{
+								String codon = base1+base2+base3;
+								codonTable.put(codon, "E");
+							}
+						}else if(base2.equals("G")){
+							String codon = base1+base2+base3;
+							codonTable.put(codon, "G");
+						}
+					}
+				}
+			}
+		}
+	}
+}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/ipig.git



More information about the debian-med-commit mailing list